(git:d18deda)
Loading...
Searching...
No Matches
cp_fm_types.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief represent a full matrix distributed on many processors
10!> \par History
11!> 3) separated structure object, removed globenv, renamed to full matrix
12!> many changes (fawzi 08.2002)
13!> \author Matthias Krack (22.05.2001)
14! **************************************************************************************************
24 USE kinds, ONLY: dp,&
25 sp
33 USE parallel_rng_types, ONLY: uniform,&
35#include "../base/base_uses.f90"
36
37 IMPLICIT NONE
38
39 PRIVATE
40
41 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_types'
42 LOGICAL, PARAMETER :: debug_this_module = .true.
43 INTEGER, PARAMETER :: src_tag = 3, dest_tag = 5, send_tag = 7, recv_tag = 11
44
45 INTEGER, PRIVATE :: cp_fm_mm_type = 1
46
47 PUBLIC :: cp_fm_type, &
49
50 PUBLIC :: cp_fm_add_to_element, &
56 cp_fm_get_diag, & ! get diagonal
57 cp_fm_set_all, & ! set all elements and diagonal
58 cp_fm_set_submatrix, & ! set a submatrix to given values
59 cp_fm_get_submatrix, & ! get a submatrix of given values
61 cp_fm_maxabsval, & ! find the maximum absolute value
62 cp_fm_maxabsrownorm, & ! find the maximum of the sum of the abs of the elements of a row
63 cp_fm_to_fm, & ! copy (parts of) a fm to a fm
64 cp_fm_vectorsnorm, & ! compute the norm of the column-vectors
65 cp_fm_vectorssum, & ! compute the sum of all elements of the column-vectors
66 cp_fm_to_fm_submat, & ! copy (parts of) a fm to a fm
72 cp_fm_write_unformatted, & ! writes a full matrix to an open unit
73 cp_fm_write_formatted, & ! writes a full matrix to an open unit
74 cp_fm_read_unformatted, & ! reads a full matrix from an open unit
75 cp_fm_setup, & ! allows to set flags for fms
78 cp_fm_to_fm_submat_general ! copy matrix across different contexts
79
80 PUBLIC :: cp_fm_pilaenv
81
82 INTERFACE cp_fm_to_fm
83 MODULE PROCEDURE cp_fm_to_fm_matrix, & ! a full matrix
84 cp_fm_to_fm_columns ! just a number of columns
85 END INTERFACE
86
87 INTERFACE cp_fm_release
88 MODULE PROCEDURE cp_fm_release_aa0, &
89 cp_fm_release_aa1, &
90 cp_fm_release_aa2, &
91 cp_fm_release_aa3, &
92 cp_fm_release_ap1, &
93 cp_fm_release_ap2, &
94 cp_fm_release_pa1, &
95 cp_fm_release_pa2, &
96 cp_fm_release_pa3, &
97 cp_fm_release_pp1, &
98 cp_fm_release_pp2
99 END INTERFACE
100
101! **************************************************************************************************
102!> \brief represent a full matrix
103!> \param name the name of the matrix, used for printing
104!> \param matrix_struct structure of this matrix
105!> \param local_data array with the data of the matrix (its contents
106!> depend on the matrix type used: in parallel runs it will be
107!> in scalapack format, in sequential, it will simply contain
108!> the matrix)
109!> \par History
110!> 08.2002 created [fawzi]
111!> \author fawzi
112! **************************************************************************************************
114! PRIVATE
115 CHARACTER(LEN=60) :: name = ""
116 LOGICAL :: use_sp = .false.
117 TYPE(cp_fm_struct_type), POINTER :: matrix_struct => null()
118 REAL(kind=dp), DIMENSION(:, :), POINTER, CONTIGUOUS :: local_data => null()
119 REAL(kind=sp), DIMENSION(:, :), POINTER, CONTIGUOUS :: local_data_sp => null()
120 END TYPE cp_fm_type
121
122! **************************************************************************************************
123!> \brief just to build arrays of pointers to matrices
124!> \param matrix the pointer to the matrix
125!> \par History
126!> 08.2002 created [fawzi]
127!> \author fawzi
128! **************************************************************************************************
130 TYPE(cp_fm_type), POINTER :: matrix => null()
131 END TYPE cp_fm_p_type
132
133! **************************************************************************************************
134!> \brief Stores the state of a copy between cp_fm_start_copy_general
135!> and cp_fm_finish_copy_general
136!> \par History
137!> Jan 2017 [Mark T]
138! **************************************************************************************************
140 INTEGER :: send_size = -1
141 INTEGER, DIMENSION(2) :: nlocal_recv = -1, nblock_src = -1, src_num_pe = -1 ! 1->row 2->col
142 TYPE(mp_request_type), DIMENSION(:), ALLOCATABLE :: send_request, recv_request
143 INTEGER, DIMENSION(:), ALLOCATABLE :: recv_disp
144 INTEGER, DIMENSION(:), POINTER :: recv_col_indices => null(), recv_row_indices => null()
145 INTEGER, DIMENSION(:, :), ALLOCATABLE :: src_blacs2mpi
146 REAL(kind=dp), DIMENSION(:), ALLOCATABLE :: recv_buf, send_buf
147 END TYPE copy_info_type
148
149CONTAINS
150
151! **************************************************************************************************
152!> \brief creates a new full matrix with the given structure
153!> \param matrix the matrix to be created
154!> \param matrix_struct the structure of matrix
155!> \param name ...
156!> \param use_sp ...
157!> \par History
158!> 08.2002 created [fawzi]
159!> \author Fawzi Mohamed
160!> \note
161!> preferred allocation routine
162! **************************************************************************************************
163 SUBROUTINE cp_fm_create(matrix, matrix_struct, name, use_sp)
164 TYPE(cp_fm_type), INTENT(OUT) :: matrix
165 TYPE(cp_fm_struct_type), INTENT(IN), TARGET :: matrix_struct
166 CHARACTER(LEN=*), INTENT(in), OPTIONAL :: name
167 LOGICAL, INTENT(in), OPTIONAL :: use_sp
168
169 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_create'
170
171 INTEGER :: handle, ncol_local, npcol, nprow, &
172 nrow_local
173 TYPE(cp_blacs_env_type), POINTER :: context
174
175 CALL timeset(routinen, handle)
176
177 context => matrix_struct%context
178 matrix%matrix_struct => matrix_struct
179 CALL cp_fm_struct_retain(matrix%matrix_struct)
180
181 matrix%use_sp = .false.
182 IF (PRESENT(use_sp)) matrix%use_sp = use_sp
183
184 nprow = context%num_pe(1)
185 npcol = context%num_pe(2)
186 NULLIFY (matrix%local_data)
187 NULLIFY (matrix%local_data_sp)
188
189 ! OK, we allocate here at least a 1 x 1 matrix
190 ! this must (and is) compatible with the descinit call
191 ! in cp_fm_struct
192 nrow_local = matrix_struct%local_leading_dimension
193 ncol_local = max(1, matrix_struct%ncol_locals(context%mepos(2)))
194 IF (matrix%use_sp) THEN
195 ALLOCATE (matrix%local_data_sp(nrow_local, ncol_local))
196 ELSE
197 ALLOCATE (matrix%local_data(nrow_local, ncol_local))
198 END IF
199
200 ! JVDV we should remove this, as it is up to the user to zero afterwards
201 IF (matrix%use_sp) THEN
202 matrix%local_data_sp(1:nrow_local, 1:ncol_local) = 0.0_sp
203 ELSE
204 matrix%local_data(1:nrow_local, 1:ncol_local) = 0.0_dp
205 END IF
206
207 IF (PRESENT(name)) THEN
208 matrix%name = name
209 ELSE
210 matrix%name = 'full matrix'
211 END IF
212
213 CALL timestop(handle)
214
215 END SUBROUTINE cp_fm_create
216
217! **************************************************************************************************
218!> \brief releases a full matrix
219!> \param matrix the matrix to release
220!> \par History
221!> 08.2002 created [fawzi]
222!> \author Fawzi Mohamed
223! **************************************************************************************************
224 SUBROUTINE cp_fm_release_aa0(matrix)
225 TYPE(cp_fm_type), INTENT(INOUT) :: matrix
226
227 IF (ASSOCIATED(matrix%local_data)) THEN
228 DEALLOCATE (matrix%local_data)
229 NULLIFY (matrix%local_data)
230 END IF
231 IF (ASSOCIATED(matrix%local_data_sp)) THEN
232 DEALLOCATE (matrix%local_data_sp)
233 NULLIFY (matrix%local_data_sp)
234 END IF
235 matrix%name = ""
236 CALL cp_fm_struct_release(matrix%matrix_struct)
237
238 END SUBROUTINE cp_fm_release_aa0
239
240! **************************************************************************************************
241!> \brief ...
242!> \param matrices ...
243! **************************************************************************************************
244 SUBROUTINE cp_fm_release_aa1(matrices)
245 TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: matrices
246
247 INTEGER :: i
248
249 IF (ALLOCATED(matrices)) THEN
250 DO i = 1, SIZE(matrices)
251 CALL cp_fm_release(matrices(i))
252 END DO
253 DEALLOCATE (matrices)
254 END IF
255 END SUBROUTINE cp_fm_release_aa1
256
257! **************************************************************************************************
258!> \brief ...
259!> \param matrices ...
260! **************************************************************************************************
261 SUBROUTINE cp_fm_release_aa2(matrices)
262 TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :) :: matrices
263
264 INTEGER :: i, j
265
266 IF (ALLOCATED(matrices)) THEN
267 DO i = 1, SIZE(matrices, 1)
268 DO j = 1, SIZE(matrices, 2)
269 CALL cp_fm_release(matrices(i, j))
270 END DO
271 END DO
272 DEALLOCATE (matrices)
273 END IF
274 END SUBROUTINE cp_fm_release_aa2
275
276! **************************************************************************************************
277!> \brief ...
278!> \param matrices ...
279! **************************************************************************************************
280 SUBROUTINE cp_fm_release_aa3(matrices)
281 TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :, :) :: matrices
282
283 INTEGER :: i, j, k
284
285 IF (ALLOCATED(matrices)) THEN
286 DO i = 1, SIZE(matrices, 1)
287 DO j = 1, SIZE(matrices, 2)
288 DO k = 1, SIZE(matrices, 3)
289 CALL cp_fm_release(matrices(i, j, k))
290 END DO
291 END DO
292 END DO
293 DEALLOCATE (matrices)
294 END IF
295 END SUBROUTINE cp_fm_release_aa3
296
297! **************************************************************************************************
298!> \brief ...
299!> \param matrices ...
300! **************************************************************************************************
301 SUBROUTINE cp_fm_release_pa1(matrices)
302 TYPE(cp_fm_type), DIMENSION(:), POINTER :: matrices
303
304 INTEGER :: i
305
306 IF (ASSOCIATED(matrices)) THEN
307 DO i = 1, SIZE(matrices)
308 CALL cp_fm_release(matrices(i))
309 END DO
310 DEALLOCATE (matrices)
311 NULLIFY (matrices)
312 END IF
313 END SUBROUTINE cp_fm_release_pa1
314
315! **************************************************************************************************
316!> \brief ...
317!> \param matrices ...
318! **************************************************************************************************
319 SUBROUTINE cp_fm_release_pa2(matrices)
320 TYPE(cp_fm_type), DIMENSION(:, :), POINTER :: matrices
321
322 INTEGER :: i, j
323
324 IF (ASSOCIATED(matrices)) THEN
325 DO i = 1, SIZE(matrices, 1)
326 DO j = 1, SIZE(matrices, 2)
327 CALL cp_fm_release(matrices(i, j))
328 END DO
329 END DO
330 DEALLOCATE (matrices)
331 NULLIFY (matrices)
332 END IF
333 END SUBROUTINE cp_fm_release_pa2
334
335! **************************************************************************************************
336!> \brief ...
337!> \param matrices ...
338! **************************************************************************************************
339 SUBROUTINE cp_fm_release_pa3(matrices)
340 TYPE(cp_fm_type), DIMENSION(:, :, :), POINTER :: matrices
341
342 INTEGER :: i, j, k
343
344 IF (ASSOCIATED(matrices)) THEN
345 DO i = 1, SIZE(matrices, 1)
346 DO j = 1, SIZE(matrices, 2)
347 DO k = 1, SIZE(matrices, 3)
348 CALL cp_fm_release(matrices(i, j, k))
349 END DO
350 END DO
351 END DO
352 DEALLOCATE (matrices)
353 NULLIFY (matrices)
354 END IF
355 END SUBROUTINE cp_fm_release_pa3
356
357! **************************************************************************************************
358!> \brief ...
359!> \param matrices ...
360! **************************************************************************************************
361 SUBROUTINE cp_fm_release_ap1(matrices)
362 TYPE(cp_fm_p_type), ALLOCATABLE, DIMENSION(:) :: matrices
363
364 INTEGER :: i
365
366 IF (ALLOCATED(matrices)) THEN
367 DO i = 1, SIZE(matrices)
368 CALL cp_fm_release(matrices(i)%matrix)
369 DEALLOCATE (matrices(i)%matrix)
370 END DO
371 DEALLOCATE (matrices)
372 END IF
373 END SUBROUTINE cp_fm_release_ap1
374
375! **************************************************************************************************
376!> \brief ...
377!> \param matrices ...
378! **************************************************************************************************
379 SUBROUTINE cp_fm_release_ap2(matrices)
380 TYPE(cp_fm_p_type), ALLOCATABLE, DIMENSION(:, :) :: matrices
381
382 INTEGER :: i, j
383
384 IF (ALLOCATED(matrices)) THEN
385 DO i = 1, SIZE(matrices, 1)
386 DO j = 1, SIZE(matrices, 2)
387 CALL cp_fm_release(matrices(i, j)%matrix)
388 DEALLOCATE (matrices(i, j)%matrix)
389 END DO
390 END DO
391 DEALLOCATE (matrices)
392 END IF
393 END SUBROUTINE cp_fm_release_ap2
394
395! **************************************************************************************************
396!> \brief ...
397!> \param matrices ...
398! **************************************************************************************************
399 SUBROUTINE cp_fm_release_pp1(matrices)
400 TYPE(cp_fm_p_type), DIMENSION(:), POINTER :: matrices
401
402 INTEGER :: i
403
404 IF (ASSOCIATED(matrices)) THEN
405 DO i = 1, SIZE(matrices)
406 CALL cp_fm_release(matrices(i)%matrix)
407 DEALLOCATE (matrices(i)%matrix)
408 END DO
409 DEALLOCATE (matrices)
410 NULLIFY (matrices)
411 END IF
412 END SUBROUTINE cp_fm_release_pp1
413
414! **************************************************************************************************
415!> \brief ...
416!> \param matrices ...
417! **************************************************************************************************
418 SUBROUTINE cp_fm_release_pp2(matrices)
419 TYPE(cp_fm_p_type), DIMENSION(:, :), POINTER :: matrices
420
421 INTEGER :: i, j
422
423 IF (ASSOCIATED(matrices)) THEN
424 DO i = 1, SIZE(matrices, 1)
425 DO j = 1, SIZE(matrices, 2)
426 CALL cp_fm_release(matrices(i, j)%matrix)
427 DEALLOCATE (matrices(i, j)%matrix)
428 END DO
429 END DO
430 DEALLOCATE (matrices)
431 NULLIFY (matrices)
432 END IF
433 END SUBROUTINE cp_fm_release_pp2
434
435! **************************************************************************************************
436!> \brief fills a matrix with random numbers
437!> \param matrix : to be initialized
438!> \param ncol : numbers of cols to fill
439!> \param start_col : starting at coll number
440!> \author Joost VandeVondele
441!> \note
442!> the value of a_ij is independent of the number of cpus
443! **************************************************************************************************
444 SUBROUTINE cp_fm_init_random(matrix, ncol, start_col)
445 TYPE(cp_fm_type), INTENT(IN) :: matrix
446 INTEGER, INTENT(IN), OPTIONAL :: ncol, start_col
447
448 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_init_random'
449
450 INTEGER :: handle, icol_global, icol_local, irow_local, my_ncol, my_start_col, ncol_global, &
451 ncol_local, nrow_global, nrow_local
452 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
453 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: buff
454 REAL(kind=dp), CONTIGUOUS, DIMENSION(:, :), &
455 POINTER :: local_data
456 REAL(kind=dp), DIMENSION(3, 2), SAVE :: &
457 seed = reshape((/1.0_dp, 2.0_dp, 3.0_dp, 4.0_dp, 5.0_dp, 6.0_dp/), (/3, 2/))
458 TYPE(rng_stream_type) :: rng
459
460 CALL timeset(routinen, handle)
461
462 ! guarantee same seed over all tasks
463 CALL matrix%matrix_struct%para_env%bcast(seed, 0)
464
465 rng = rng_stream_type("cp_fm_init_random_stream", distribution_type=uniform, &
466 extended_precision=.true., seed=seed)
467
468 cpassert(.NOT. matrix%use_sp)
469
470 CALL cp_fm_get_info(matrix, nrow_global=nrow_global, ncol_global=ncol_global, &
471 nrow_local=nrow_local, ncol_local=ncol_local, &
472 local_data=local_data, &
473 row_indices=row_indices, col_indices=col_indices)
474
475 my_start_col = 1
476 IF (PRESENT(start_col)) my_start_col = start_col
477 my_ncol = matrix%matrix_struct%ncol_global
478 IF (PRESENT(ncol)) my_ncol = ncol
479
480 IF (ncol_global < (my_start_col + my_ncol - 1)) &
481 cpabort("ncol_global>=(my_start_col+my_ncol-1)")
482
483 ALLOCATE (buff(nrow_global))
484
485 ! each global row has its own substream, in order to reach the stream for the local col,
486 ! we just reset to the next substream
487 ! following this, we fill the full buff with random numbers, and pick those we need
488 icol_global = 0
489 DO icol_local = 1, ncol_local
490 cpassert(col_indices(icol_local) > icol_global)
491 DO
492 CALL rng%reset_to_next_substream()
493 icol_global = icol_global + 1
494 IF (icol_global == col_indices(icol_local)) EXIT
495 END DO
496 CALL rng%fill(buff)
497 DO irow_local = 1, nrow_local
498 local_data(irow_local, icol_local) = buff(row_indices(irow_local))
499 END DO
500 END DO
501
502 DEALLOCATE (buff)
503
504 ! store seed before deletion (unclear if this is the proper seed)
505
506 ! Note that, the initial state (rng%ig) instead of the current state (rng%cg) is stored in the
507 ! seed variable. As a consequence, each invocation of cp_fm_init_random uses exactly the same
508 ! stream of random numbers. While this seems odd and contrary to the original design,
509 ! it was probably introduced to improve reproducibility.
510 ! See also https://github.com/cp2k/cp2k/pull/506
511 CALL rng%get(ig=seed)
512
513 CALL timestop(handle)
514
515 END SUBROUTINE cp_fm_init_random
516
517! **************************************************************************************************
518!> \brief set all elements of a matrix to the same value,
519!> and optionally the diagonal to a different one
520!> \param matrix input matrix
521!> \param alpha scalar used to set all elements of the matrix
522!> \param beta scalar used to set diagonal of the matrix
523!> \note
524!> can be used to zero a matrix
525!> can be used to create a unit matrix (I-matrix) alpha=0.0_dp beta=1.0_dp
526! **************************************************************************************************
527 SUBROUTINE cp_fm_set_all(matrix, alpha, beta)
528
529 TYPE(cp_fm_type), INTENT(IN) :: matrix
530 REAL(kind=dp), INTENT(IN) :: alpha
531 REAL(kind=dp), INTENT(IN), OPTIONAL :: beta
532
533 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_set_all'
534
535 INTEGER :: handle, i, n
536
537 CALL timeset(routinen, handle)
538
539 IF (matrix%use_sp) THEN
540 matrix%local_data_sp(:, :) = real(alpha, sp)
541 ELSE
542 matrix%local_data(:, :) = alpha
543 END IF
544
545 IF (PRESENT(beta)) THEN
546 cpassert(.NOT. matrix%use_sp)
547 n = min(matrix%matrix_struct%nrow_global, matrix%matrix_struct%ncol_global)
548 DO i = 1, n
549 CALL cp_fm_set_element(matrix, i, i, beta)
550 END DO
551 END IF
552
553 CALL timestop(handle)
554
555 END SUBROUTINE cp_fm_set_all
556
557! **************************************************************************************************
558!> \brief returns the diagonal elements of a fm
559!> \param matrix ...
560!> \param diag ...
561! **************************************************************************************************
562 SUBROUTINE cp_fm_get_diag(matrix, diag)
563
564 ! arguments
565 TYPE(cp_fm_type), INTENT(IN) :: matrix
566 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: diag
567
568 ! locals
569 INTEGER :: i, nrow_global
570
571#if defined(__parallel)
572 INTEGER, DIMENSION(9) :: desca
573 TYPE(cp_blacs_env_type), POINTER :: context
574 INTEGER :: icol_local, ipcol, iprow, irow_local, mypcol, myprow, npcol, &
575 nprow
576 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
577 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp
578#endif
579
580 CALL cp_fm_get_info(matrix, nrow_global=nrow_global)
581
582#if defined(__parallel)
583 diag = 0.0_dp
584 context => matrix%matrix_struct%context
585 myprow = context%mepos(1)
586 mypcol = context%mepos(2)
587 nprow = context%num_pe(1)
588 npcol = context%num_pe(2)
589
590 a => matrix%local_data
591 a_sp => matrix%local_data_sp
592 desca(:) = matrix%matrix_struct%descriptor(:)
593
594 DO i = 1, nrow_global
595 CALL infog2l(i, i, desca, nprow, npcol, myprow, mypcol, &
596 irow_local, icol_local, iprow, ipcol)
597 IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
598 IF (matrix%use_sp) THEN
599 diag(i) = real(a_sp(irow_local, icol_local), dp)
600 ELSE
601 diag(i) = a(irow_local, icol_local)
602 END IF
603 END IF
604 END DO
605#else
606 DO i = 1, nrow_global
607 IF (matrix%use_sp) THEN
608 diag(i) = real(matrix%local_data_sp(i, i), dp)
609 ELSE
610 diag(i) = matrix%local_data(i, i)
611 END IF
612 END DO
613#endif
614 CALL matrix%matrix_struct%para_env%sum(diag)
615
616 END SUBROUTINE cp_fm_get_diag
617
618! **************************************************************************************************
619!> \brief returns an element of a fm
620!> this value is valid on every cpu
621!> using this call is expensive
622!> \param matrix the matrix to read
623!> \param irow_global the row
624!> \param icol_global the col
625!> \param alpha the value of matrix(irow_global, icol_global)
626!> \param local true if the element is on this cpu, false otherwise
627!> \note
628!> - modified semantics. now this function always returns the value
629!> previously the value was zero on cpus that didn't own the relevant
630!> part of the matrix (Joost VandeVondele, May 2003)
631!> - usage of the function should be avoided, as it is likely to rather slow
632!> using row_indices/col_indices/local_data + some smart scheme normally
633!> yields a real parallel code
634! **************************************************************************************************
635 SUBROUTINE cp_fm_get_element(matrix, irow_global, icol_global, alpha, local)
636
637 ! arguments
638 TYPE(cp_fm_type), INTENT(IN) :: matrix
639 REAL(kind=dp), INTENT(OUT) :: alpha
640 INTEGER, INTENT(IN) :: icol_global, &
641 irow_global
642 LOGICAL, INTENT(OUT), OPTIONAL :: local
643
644 ! locals
645#if defined(__parallel)
646 INTEGER, DIMENSION(9) :: desca
647 TYPE(cp_blacs_env_type), POINTER :: context
648 INTEGER :: icol_local, ipcol, iprow, irow_local, mypcol, myprow, npcol, &
649 nprow
650 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
651#endif
652
653#if defined(__parallel)
654 context => matrix%matrix_struct%context
655 myprow = context%mepos(1)
656 mypcol = context%mepos(2)
657 nprow = context%num_pe(1)
658 npcol = context%num_pe(2)
659
660 a => matrix%local_data
661 desca(:) = matrix%matrix_struct%descriptor(:)
662
663 CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
664 irow_local, icol_local, iprow, ipcol)
665
666 IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
667 alpha = a(irow_local, icol_local)
668 CALL context%dgebs2d('All', ' ', 1, 1, alpha, 1)
669 IF (PRESENT(local)) local = .true.
670 ELSE
671 CALL context%dgebr2d('All', ' ', 1, 1, alpha, 1, iprow, ipcol)
672 IF (PRESENT(local)) local = .false.
673 END IF
674
675#else
676 IF (PRESENT(local)) local = .true.
677 alpha = matrix%local_data(irow_global, icol_global)
678#endif
679
680 END SUBROUTINE cp_fm_get_element
681
682! **************************************************************************************************
683!> \brief sets an element of a matrix
684!> \param matrix ...
685!> \param irow_global ...
686!> \param icol_global ...
687!> \param alpha ...
688!> \note
689!> we expect all cpus to have the same arguments in the call to this function
690!> (otherwise one should use local_data tricks)
691! **************************************************************************************************
692 SUBROUTINE cp_fm_set_element(matrix, irow_global, icol_global, alpha)
693 TYPE(cp_fm_type), INTENT(IN) :: matrix
694 INTEGER, INTENT(IN) :: irow_global, icol_global
695 REAL(kind=dp), INTENT(IN) :: alpha
696
697 INTEGER :: mypcol, myprow, npcol, nprow
698 TYPE(cp_blacs_env_type), POINTER :: context
699#if defined(__parallel)
700 INTEGER :: icol_local, ipcol, iprow, &
701 irow_local
702 INTEGER, DIMENSION(9) :: desca
703 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
704#endif
705
706 context => matrix%matrix_struct%context
707 myprow = context%mepos(1)
708 mypcol = context%mepos(2)
709 nprow = context%num_pe(1)
710 npcol = context%num_pe(2)
711
712 cpassert(.NOT. matrix%use_sp)
713
714#if defined(__parallel)
715
716 a => matrix%local_data
717
718 desca(:) = matrix%matrix_struct%descriptor(:)
719
720 CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
721 irow_local, icol_local, iprow, ipcol)
722
723 IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
724 a(irow_local, icol_local) = alpha
725 END IF
726
727#else
728
729 matrix%local_data(irow_global, icol_global) = alpha
730
731#endif
732 END SUBROUTINE cp_fm_set_element
733
734! **************************************************************************************************
735!> \brief sets a submatrix of a full matrix
736!> fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
737!> = alpha*op(new_values)(1:n_rows,1:n_cols)+ beta
738!> * fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
739!> \param fm the full to change
740!> \param new_values a replicated full matrix with the new values
741!> \param start_row the starting row of b_matrix (defaults to 1)
742!> \param start_col the starting col of b_matrix (defaults to 1)
743!> \param n_rows the number of row to change in b (defaults to
744!> size(op(new_values),1))
745!> \param n_cols the number of columns to change in b (defaults to
746!> size(op(new_values),2))
747!> \param alpha rescaling factor for the new values (defaults to 1.0)
748!> \param beta rescaling factor for the old values (defaults to 0.0)
749!> \param transpose if new_values should be transposed: if true
750!> op(new_values)=new_values^T, else op(new_values)=new_values
751!> (defaults to false)
752!> \par History
753!> 07.2002 created borrowing from Joost's blacs_replicated_copy [fawzi]
754!> \author Fawzi Mohamed
755!> \note
756!> optimized for full column updates and alpha=1.0, beta=0.0
757!> the new_values need to be valid on all cpus
758! **************************************************************************************************
759 SUBROUTINE cp_fm_set_submatrix(fm, new_values, start_row, &
760 start_col, n_rows, n_cols, alpha, beta, transpose)
761 TYPE(cp_fm_type), INTENT(IN) :: fm
762 REAL(kind=dp), DIMENSION(:, :), INTENT(in) :: new_values
763 INTEGER, INTENT(in), OPTIONAL :: start_row, start_col, n_rows, n_cols
764 REAL(kind=dp), INTENT(in), OPTIONAL :: alpha, beta
765 LOGICAL, INTENT(in), OPTIONAL :: transpose
766
767 INTEGER :: i, i0, j, j0, ncol, ncol_block, &
768 ncol_global, ncol_local, nrow, &
769 nrow_block, nrow_global, nrow_local, &
770 this_col, this_row
771 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
772 LOGICAL :: tr_a
773 REAL(kind=dp) :: al, be
774 REAL(kind=dp), DIMENSION(:, :), POINTER :: full_block
775
776 al = 1.0_dp; be = 0.0_dp; i0 = 1; j0 = 1; tr_a = .false.
777 ! can be called too many times, making it a bit useless
778 ! CALL timeset(routineN//','//moduleN,handle)
779
780 cpassert(.NOT. fm%use_sp)
781
782 IF (PRESENT(alpha)) al = alpha
783 IF (PRESENT(beta)) be = beta
784 IF (PRESENT(start_row)) i0 = start_row
785 IF (PRESENT(start_col)) j0 = start_col
786 IF (PRESENT(transpose)) tr_a = transpose
787 IF (tr_a) THEN
788 nrow = SIZE(new_values, 2)
789 ncol = SIZE(new_values, 1)
790 ELSE
791 nrow = SIZE(new_values, 1)
792 ncol = SIZE(new_values, 2)
793 END IF
794 IF (PRESENT(n_rows)) nrow = n_rows
795 IF (PRESENT(n_cols)) ncol = n_cols
796
797 full_block => fm%local_data
798
799 CALL cp_fm_get_info(matrix=fm, &
800 nrow_global=nrow_global, ncol_global=ncol_global, &
801 nrow_block=nrow_block, ncol_block=ncol_block, &
802 nrow_local=nrow_local, ncol_local=ncol_local, &
803 row_indices=row_indices, col_indices=col_indices)
804
805 IF (al == 1.0 .AND. be == 0.0) THEN
806 DO j = 1, ncol_local
807 this_col = col_indices(j) - j0 + 1
808 IF (this_col .GE. 1 .AND. this_col .LE. ncol) THEN
809 IF (tr_a) THEN
810 IF (i0 == 1 .AND. nrow_global == nrow) THEN
811 DO i = 1, nrow_local
812 full_block(i, j) = new_values(this_col, row_indices(i))
813 END DO
814 ELSE
815 DO i = 1, nrow_local
816 this_row = row_indices(i) - i0 + 1
817 IF (this_row >= 1 .AND. this_row <= nrow) THEN
818 full_block(i, j) = new_values(this_col, this_row)
819 END IF
820 END DO
821 END IF
822 ELSE
823 IF (i0 == 1 .AND. nrow_global == nrow) THEN
824 DO i = 1, nrow_local
825 full_block(i, j) = new_values(row_indices(i), this_col)
826 END DO
827 ELSE
828 DO i = 1, nrow_local
829 this_row = row_indices(i) - i0 + 1
830 IF (this_row >= 1 .AND. this_row <= nrow) THEN
831 full_block(i, j) = new_values(this_row, this_col)
832 END IF
833 END DO
834 END IF
835 END IF
836 END IF
837 END DO
838 ELSE
839 DO j = 1, ncol_local
840 this_col = col_indices(j) - j0 + 1
841 IF (this_col .GE. 1 .AND. this_col .LE. ncol) THEN
842 IF (tr_a) THEN
843 DO i = 1, nrow_local
844 this_row = row_indices(i) - i0 + 1
845 IF (this_row >= 1 .AND. this_row <= nrow) THEN
846 full_block(i, j) = al*new_values(this_col, this_row) + &
847 be*full_block(i, j)
848 END IF
849 END DO
850 ELSE
851 DO i = 1, nrow_local
852 this_row = row_indices(i) - i0 + 1
853 IF (this_row >= 1 .AND. this_row <= nrow) THEN
854 full_block(i, j) = al*new_values(this_row, this_col) + &
855 be*full_block(i, j)
856 END IF
857 END DO
858 END IF
859 END IF
860 END DO
861 END IF
862
863 ! CALL timestop(handle)
864
865 END SUBROUTINE cp_fm_set_submatrix
866
867! **************************************************************************************************
868!> \brief gets a submatrix of a full matrix
869!> op(target_m)(1:n_rows,1:n_cols)
870!> =fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
871!> target_m is replicated on all cpus
872!> using this call is expensive
873!> \param fm the full you want to get the info from
874!> \param target_m a replicated full matrix that will contain the result
875!> \param start_row the starting row of b_matrix (defaults to 1)
876!> \param start_col the starting col of b_matrix (defaults to 1)
877!> \param n_rows the number of row to change in b (defaults to
878!> size(op(new_values),1))
879!> \param n_cols the number of columns to change in b (defaults to
880!> size(op(new_values),2))
881!> \param transpose if target_m should be transposed: if true
882!> op(target_m)=target_m^T, else op(target_m)=target_m
883!> (defaults to false)
884!> \par History
885!> 07.2002 created borrowing from Joost's blacs_replicated_copy [fawzi]
886!> \author Fawzi Mohamed
887!> \note
888!> optimized for full column updates. Zeros out a little too much
889!> of target_m
890!> the target_m is replicated and valid on all cpus
891! **************************************************************************************************
892 SUBROUTINE cp_fm_get_submatrix(fm, target_m, start_row, &
893 start_col, n_rows, n_cols, transpose)
894 TYPE(cp_fm_type), INTENT(IN) :: fm
895 REAL(kind=dp), DIMENSION(:, :), INTENT(out) :: target_m
896 INTEGER, INTENT(in), OPTIONAL :: start_row, start_col, n_rows, n_cols
897 LOGICAL, INTENT(in), OPTIONAL :: transpose
898
899 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_get_submatrix'
900
901 INTEGER :: handle, i, i0, j, j0, ncol, ncol_global, &
902 ncol_local, nrow, nrow_global, &
903 nrow_local, this_col, this_row
904 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
905 LOGICAL :: tr_a
906 REAL(kind=dp), DIMENSION(:, :), POINTER :: full_block
907 TYPE(mp_para_env_type), POINTER :: para_env
908
909 CALL timeset(routinen, handle)
910
911 i0 = 1; j0 = 1; tr_a = .false.
912
913 cpassert(.NOT. fm%use_sp)
914
915 IF (PRESENT(start_row)) i0 = start_row
916 IF (PRESENT(start_col)) j0 = start_col
917 IF (PRESENT(transpose)) tr_a = transpose
918 IF (tr_a) THEN
919 nrow = SIZE(target_m, 2)
920 ncol = SIZE(target_m, 1)
921 ELSE
922 nrow = SIZE(target_m, 1)
923 ncol = SIZE(target_m, 2)
924 END IF
925 IF (PRESENT(n_rows)) nrow = n_rows
926 IF (PRESENT(n_cols)) ncol = n_cols
927
928 para_env => fm%matrix_struct%para_env
929
930 full_block => fm%local_data
931#if defined(__parallel)
932 ! zero-out whole target_m
933 IF (SIZE(target_m, 1)*SIZE(target_m, 2) /= 0) THEN
934 CALL dcopy(SIZE(target_m, 1)*SIZE(target_m, 2), [0.0_dp], 0, target_m, 1)
935 END IF
936#endif
937
938 CALL cp_fm_get_info(matrix=fm, &
939 nrow_global=nrow_global, ncol_global=ncol_global, &
940 nrow_local=nrow_local, ncol_local=ncol_local, &
941 row_indices=row_indices, col_indices=col_indices)
942
943 DO j = 1, ncol_local
944 this_col = col_indices(j) - j0 + 1
945 IF (this_col .GE. 1 .AND. this_col .LE. ncol) THEN
946 IF (tr_a) THEN
947 IF (i0 == 1 .AND. nrow_global == nrow) THEN
948 DO i = 1, nrow_local
949 target_m(this_col, row_indices(i)) = full_block(i, j)
950 END DO
951 ELSE
952 DO i = 1, nrow_local
953 this_row = row_indices(i) - i0 + 1
954 IF (this_row >= 1 .AND. this_row <= nrow) THEN
955 target_m(this_col, this_row) = full_block(i, j)
956 END IF
957 END DO
958 END IF
959 ELSE
960 IF (i0 == 1 .AND. nrow_global == nrow) THEN
961 DO i = 1, nrow_local
962 target_m(row_indices(i), this_col) = full_block(i, j)
963 END DO
964 ELSE
965 DO i = 1, nrow_local
966 this_row = row_indices(i) - i0 + 1
967 IF (this_row >= 1 .AND. this_row <= nrow) THEN
968 target_m(this_row, this_col) = full_block(i, j)
969 END IF
970 END DO
971 END IF
972 END IF
973 END IF
974 END DO
975
976 CALL para_env%sum(target_m)
977
978 CALL timestop(handle)
979
980 END SUBROUTINE cp_fm_get_submatrix
981
982! **************************************************************************************************
983!> \brief returns all kind of information about the full matrix
984!> \param matrix ...
985!> \param name ...
986!> \param nrow_global ...
987!> \param ncol_global ...
988!> \param nrow_block ...
989!> \param ncol_block ...
990!> \param nrow_local ...
991!> \param ncol_local ...
992!> \param row_indices ...
993!> \param col_indices ...
994!> \param local_data ...
995!> \param context ...
996!> \param nrow_locals ...
997!> \param ncol_locals ...
998!> \param matrix_struct ...
999!> \param para_env ...
1000!> \note
1001!> see also cp_fm_struct for explanation
1002!> - nrow_local, ncol_local, row_indices, col_indices, local_data are hooks for efficient
1003!> access to the local blacs block
1004! **************************************************************************************************
1005 SUBROUTINE cp_fm_get_info(matrix, name, nrow_global, ncol_global, &
1006 nrow_block, ncol_block, nrow_local, ncol_local, &
1007 row_indices, col_indices, local_data, context, &
1008 nrow_locals, ncol_locals, matrix_struct, para_env)
1009
1010 TYPE(cp_fm_type), INTENT(IN) :: matrix
1011 CHARACTER(LEN=*), INTENT(OUT), OPTIONAL :: name
1012 INTEGER, INTENT(OUT), OPTIONAL :: nrow_global, ncol_global, nrow_block, &
1013 ncol_block, nrow_local, ncol_local
1014 INTEGER, DIMENSION(:), OPTIONAL, POINTER :: row_indices, col_indices
1015 REAL(kind=dp), CONTIGUOUS, DIMENSION(:, :), &
1016 OPTIONAL, POINTER :: local_data
1017 TYPE(cp_blacs_env_type), OPTIONAL, POINTER :: context
1018 INTEGER, DIMENSION(:), OPTIONAL, POINTER :: nrow_locals, ncol_locals
1019 TYPE(cp_fm_struct_type), OPTIONAL, POINTER :: matrix_struct
1020 TYPE(mp_para_env_type), OPTIONAL, POINTER :: para_env
1021
1022 IF (PRESENT(name)) name = matrix%name
1023 IF (PRESENT(matrix_struct)) matrix_struct => matrix%matrix_struct
1024 IF (PRESENT(local_data)) local_data => matrix%local_data ! not hiding things anymore :-(
1025
1026 CALL cp_fm_struct_get(matrix%matrix_struct, nrow_local=nrow_local, &
1027 ncol_local=ncol_local, nrow_global=nrow_global, &
1028 ncol_global=ncol_global, nrow_block=nrow_block, &
1029 ncol_block=ncol_block, row_indices=row_indices, &
1030 col_indices=col_indices, nrow_locals=nrow_locals, &
1031 ncol_locals=ncol_locals, context=context, para_env=para_env)
1032
1033 END SUBROUTINE cp_fm_get_info
1034
1035! **************************************************************************************************
1036!> \brief Write nicely formatted info about the FM to the given I/O unit (including the underlying FM struct)
1037!> \param matrix a cp_fm_type instance
1038!> \param io_unit the I/O unit to use for writing
1039! **************************************************************************************************
1040 SUBROUTINE cp_fm_write_info(matrix, io_unit)
1041 TYPE(cp_fm_type), INTENT(IN) :: matrix
1042 INTEGER, INTENT(IN) :: io_unit
1043
1044 WRITE (io_unit, '(/,A,A12)') "CP_FM | Name: ", matrix%name
1045 CALL cp_fm_struct_write_info(matrix%matrix_struct, io_unit)
1046 END SUBROUTINE cp_fm_write_info
1047
1048! **************************************************************************************************
1049!> \brief find the maximum absolute value of the matrix element
1050!> maxval(abs(matrix))
1051!> \param matrix ...
1052!> \param a_max ...
1053!> \param ir_max ...
1054!> \param ic_max ...
1055! **************************************************************************************************
1056 SUBROUTINE cp_fm_maxabsval(matrix, a_max, ir_max, ic_max)
1057 TYPE(cp_fm_type), INTENT(IN) :: matrix
1058 REAL(kind=dp), INTENT(OUT) :: a_max
1059 INTEGER, INTENT(OUT), OPTIONAL :: ir_max, ic_max
1060
1061 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_maxabsval'
1062
1063 INTEGER :: handle, i, ic_max_local, ir_max_local, &
1064 j, mepos, ncol_local, nrow_local, &
1065 num_pe
1066 INTEGER, ALLOCATABLE, DIMENSION(:) :: ic_max_vec, ir_max_vec
1067 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1068 REAL(dp) :: my_max
1069 REAL(dp), ALLOCATABLE, DIMENSION(:) :: a_max_vec
1070 REAL(kind=dp), DIMENSION(:, :), POINTER :: my_block
1071 REAL(kind=sp), DIMENSION(:, :), POINTER :: my_block_sp
1072
1073 CALL timeset(routinen, handle)
1074
1075 my_block => matrix%local_data
1076 my_block_sp => matrix%local_data_sp
1077
1078 CALL cp_fm_get_info(matrix, nrow_local=nrow_local, ncol_local=ncol_local, &
1079 row_indices=row_indices, col_indices=col_indices)
1080
1081 IF (matrix%use_sp) THEN
1082 a_max = real(maxval(abs(my_block_sp(1:nrow_local, 1:ncol_local))), dp)
1083 ELSE
1084 a_max = maxval(abs(my_block(1:nrow_local, 1:ncol_local)))
1085 END IF
1086
1087 IF (PRESENT(ir_max)) THEN
1088 num_pe = matrix%matrix_struct%para_env%num_pe
1089 mepos = matrix%matrix_struct%para_env%mepos
1090 ALLOCATE (ir_max_vec(0:num_pe - 1))
1091 ir_max_vec(0:num_pe - 1) = 0
1092 ALLOCATE (ic_max_vec(0:num_pe - 1))
1093 ic_max_vec(0:num_pe - 1) = 0
1094 ALLOCATE (a_max_vec(0:num_pe - 1))
1095 a_max_vec(0:num_pe - 1) = 0.0_dp
1096 my_max = 0.0_dp
1097
1098 IF ((ncol_local > 0) .AND. (nrow_local > 0)) THEN
1099 DO i = 1, ncol_local
1100 DO j = 1, nrow_local
1101 IF (matrix%use_sp) THEN
1102 IF (abs(my_block_sp(j, i)) > my_max) THEN
1103 my_max = real(my_block_sp(j, i), dp)
1104 ir_max_local = j
1105 ic_max_local = i
1106 END IF
1107 ELSE
1108 IF (abs(my_block(j, i)) > my_max) THEN
1109 my_max = my_block(j, i)
1110 ir_max_local = j
1111 ic_max_local = i
1112 END IF
1113 END IF
1114 END DO
1115 END DO
1116
1117 a_max_vec(mepos) = my_max
1118 ir_max_vec(mepos) = row_indices(ir_max_local)
1119 ic_max_vec(mepos) = col_indices(ic_max_local)
1120
1121 END IF
1122
1123 CALL matrix%matrix_struct%para_env%sum(a_max_vec)
1124 CALL matrix%matrix_struct%para_env%sum(ir_max_vec)
1125 CALL matrix%matrix_struct%para_env%sum(ic_max_vec)
1126
1127 my_max = 0.0_dp
1128 DO i = 0, num_pe - 1
1129 IF (a_max_vec(i) > my_max) THEN
1130 ir_max = ir_max_vec(i)
1131 ic_max = ic_max_vec(i)
1132 END IF
1133 END DO
1134
1135 DEALLOCATE (ir_max_vec, ic_max_vec, a_max_vec)
1136 cpassert(ic_max > 0)
1137 cpassert(ir_max > 0)
1138
1139 END IF
1140
1141 CALL matrix%matrix_struct%para_env%max(a_max)
1142
1143 CALL timestop(handle)
1144
1145 END SUBROUTINE cp_fm_maxabsval
1146
1147! **************************************************************************************************
1148!> \brief find the maximum over the rows of the sum of the absolute values of the elements of a given row
1149!> = || A ||_infinity
1150!> \param matrix ...
1151!> \param a_max ...
1152!> \note
1153!> for a real symmetric matrix it holds that || A ||_2 = |lambda_max| < || A ||_infinity
1154!> Hence this can be used to estimate an upper bound for the eigenvalues of a matrix
1155!> http://mathworld.wolfram.com/MatrixNorm.html
1156!> (but the bound is not so tight in the general case)
1157! **************************************************************************************************
1158 SUBROUTINE cp_fm_maxabsrownorm(matrix, a_max)
1159 TYPE(cp_fm_type), INTENT(IN) :: matrix
1160 REAL(kind=dp), INTENT(OUT) :: a_max
1161
1162 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_maxabsrownorm'
1163
1164 INTEGER :: handle, i, j, ncol_local, nrow_global, &
1165 nrow_local
1166 INTEGER, DIMENSION(:), POINTER :: row_indices
1167 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: values
1168 REAL(kind=dp), DIMENSION(:, :), POINTER :: my_block
1169
1170 CALL timeset(routinen, handle)
1171
1172 my_block => matrix%local_data
1173
1174 cpassert(.NOT. matrix%use_sp)
1175
1176 CALL cp_fm_get_info(matrix, row_indices=row_indices, nrow_global=nrow_global, &
1177 nrow_local=nrow_local, ncol_local=ncol_local)
1178
1179 ! the efficiency could be improved by making use of the row-col distribution of scalapack
1180 ALLOCATE (values(nrow_global))
1181 values = 0.0_dp
1182 DO j = 1, ncol_local
1183 DO i = 1, nrow_local
1184 values(row_indices(i)) = values(row_indices(i)) + abs(my_block(i, j))
1185 END DO
1186 END DO
1187 CALL matrix%matrix_struct%para_env%sum(values)
1188 a_max = maxval(values)
1189 DEALLOCATE (values)
1190
1191 CALL timestop(handle)
1192 END SUBROUTINE
1193
1194! **************************************************************************************************
1195!> \brief find the inorm of each column norm_{j}= sqrt( \sum_{i} A_{ij}*A_{ij} )
1196!> \param matrix ...
1197!> \param norm_array ...
1198! **************************************************************************************************
1199 SUBROUTINE cp_fm_vectorsnorm(matrix, norm_array)
1200 TYPE(cp_fm_type), INTENT(IN) :: matrix
1201 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: norm_array
1202
1203 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_vectorsnorm'
1204
1205 INTEGER :: handle, i, j, ncol_global, ncol_local, &
1206 nrow_local
1207 INTEGER, DIMENSION(:), POINTER :: col_indices
1208 REAL(kind=dp), DIMENSION(:, :), POINTER :: my_block
1209
1210 CALL timeset(routinen, handle)
1211
1212 my_block => matrix%local_data
1213
1214 cpassert(.NOT. matrix%use_sp)
1215
1216 CALL cp_fm_get_info(matrix, col_indices=col_indices, ncol_global=ncol_global, &
1217 nrow_local=nrow_local, ncol_local=ncol_local)
1218
1219 ! the efficiency could be improved by making use of the row-col distribution of scalapack
1220 norm_array = 0.0_dp
1221 DO j = 1, ncol_local
1222 DO i = 1, nrow_local
1223 norm_array(col_indices(j)) = norm_array(col_indices(j)) + my_block(i, j)*my_block(i, j)
1224 END DO
1225 END DO
1226 CALL matrix%matrix_struct%para_env%sum(norm_array)
1227 norm_array = sqrt(norm_array)
1228
1229 CALL timestop(handle)
1230 END SUBROUTINE
1231
1232! **************************************************************************************************
1233!> \brief summing up all the elements along the matrix's i-th index
1234!> \f$ \mathrm{sum}_{j} = \sum_{i} A_{ij} \f$
1235!> or
1236!> \f$ \mathrm{sum}_{i} = \sum_{j} A_{ij} \f$
1237!> \param matrix an input matrix A
1238!> \param sum_array sums of elements in each column/row
1239!> \param dir ...
1240!> \note forked from cp_fm_vectorsnorm() to be used with
1241!> the maximum overlap method
1242!> added row variation
1243! **************************************************************************************************
1244 SUBROUTINE cp_fm_vectorssum(matrix, sum_array, dir)
1245 TYPE(cp_fm_type), INTENT(IN) :: matrix
1246 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: sum_array
1247 CHARACTER(LEN=1), INTENT(IN), OPTIONAL :: dir
1248
1249 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_vectorssum'
1250
1251 INTEGER :: handle, i, j, ncol_local, nrow_local
1252 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1253 LOGICAL :: docol
1254 REAL(kind=dp), DIMENSION(:, :), POINTER :: my_block
1255
1256 CALL timeset(routinen, handle)
1257
1258 IF (PRESENT(dir)) THEN
1259 IF (dir == 'c' .OR. dir == 'C') THEN
1260 docol = .true.
1261 ELSEIF (dir == 'r' .OR. dir == 'R') THEN
1262 docol = .false.
1263 ELSE
1264 cpabort('Wrong argument DIR')
1265 END IF
1266 ELSE
1267 docol = .true.
1268 END IF
1269
1270 my_block => matrix%local_data
1271
1272 cpassert(.NOT. matrix%use_sp)
1273
1274 CALL cp_fm_get_info(matrix, col_indices=col_indices, row_indices=row_indices, &
1275 nrow_local=nrow_local, ncol_local=ncol_local)
1276
1277 ! the efficiency could be improved by making use of the row-col distribution of scalapack
1278 sum_array(:) = 0.0_dp
1279 IF (docol) THEN
1280 DO j = 1, ncol_local
1281 DO i = 1, nrow_local
1282 sum_array(col_indices(j)) = sum_array(col_indices(j)) + my_block(i, j)
1283 END DO
1284 END DO
1285 ELSE
1286 DO j = 1, ncol_local
1287 DO i = 1, nrow_local
1288 sum_array(row_indices(i)) = sum_array(row_indices(i)) + my_block(i, j)
1289 END DO
1290 END DO
1291 END IF
1292 CALL matrix%matrix_struct%para_env%sum(sum_array)
1293
1294 CALL timestop(handle)
1295 END SUBROUTINE
1296
1297! **************************************************************************************************
1298!> \brief copy one identically sized matrix in the other
1299!> \param source ...
1300!> \param destination ...
1301!> \note
1302!> see also cp_fm_to_fm_columns
1303! **************************************************************************************************
1304 SUBROUTINE cp_fm_to_fm_matrix(source, destination)
1305
1306 TYPE(cp_fm_type), INTENT(IN) :: source, destination
1307
1308 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_to_fm_matrix'
1309
1310 INTEGER :: handle, npcol, nprow
1311
1312 CALL timeset(routinen, handle)
1313
1314 nprow = source%matrix_struct%context%num_pe(1)
1315 npcol = source%matrix_struct%context%num_pe(2)
1316
1317 IF ((.NOT. cp2k_is_parallel) .OR. &
1318 cp_fm_struct_equivalent(source%matrix_struct, &
1319 destination%matrix_struct)) THEN
1320 IF (source%use_sp .AND. destination%use_sp) THEN
1321 IF (SIZE(source%local_data_sp, 1) /= SIZE(destination%local_data_sp, 1) .OR. &
1322 SIZE(source%local_data_sp, 2) /= SIZE(destination%local_data_sp, 2)) &
1323 CALL cp_abort(__location__, &
1324 "Cannot copy full matrix <"//trim(source%name)// &
1325 "> to full matrix <"//trim(destination%name)// &
1326 ">. The local_data blocks have different sizes.")
1327 CALL scopy(SIZE(source%local_data_sp, 1)*SIZE(source%local_data_sp, 2), &
1328 source%local_data_sp, 1, destination%local_data_sp, 1)
1329 ELSE IF (source%use_sp .AND. .NOT. destination%use_sp) THEN
1330 IF (SIZE(source%local_data_sp, 1) /= SIZE(destination%local_data, 1) .OR. &
1331 SIZE(source%local_data_sp, 2) /= SIZE(destination%local_data, 2)) &
1332 CALL cp_abort(__location__, &
1333 "Cannot copy full matrix <"//trim(source%name)// &
1334 "> to full matrix <"//trim(destination%name)// &
1335 ">. The local_data blocks have different sizes.")
1336 destination%local_data = real(source%local_data_sp, dp)
1337 ELSE IF (.NOT. source%use_sp .AND. destination%use_sp) THEN
1338 IF (SIZE(source%local_data, 1) /= SIZE(destination%local_data_sp, 1) .OR. &
1339 SIZE(source%local_data, 2) /= SIZE(destination%local_data_sp, 2)) &
1340 CALL cp_abort(__location__, &
1341 "Cannot copy full matrix <"//trim(source%name)// &
1342 "> to full matrix <"//trim(destination%name)// &
1343 ">. The local_data blocks have different sizes.")
1344 destination%local_data_sp = real(source%local_data, sp)
1345 ELSE
1346 IF (SIZE(source%local_data, 1) /= SIZE(destination%local_data, 1) .OR. &
1347 SIZE(source%local_data, 2) /= SIZE(destination%local_data, 2)) &
1348 CALL cp_abort(__location__, &
1349 "Cannot copy full matrix <"//trim(source%name)// &
1350 "> to full matrix <"//trim(destination%name)// &
1351 ">. The local_data blocks have different sizes.")
1352 CALL dcopy(SIZE(source%local_data, 1)*SIZE(source%local_data, 2), &
1353 source%local_data, 1, destination%local_data, 1)
1354 END IF
1355 ELSE
1356 cpabort("Data structures of source and target full matrix are not equivalent")
1357 END IF
1358
1359 CALL timestop(handle)
1360
1361 END SUBROUTINE cp_fm_to_fm_matrix
1362
1363! **************************************************************************************************
1364!> \brief copy just a subset of columns of a fm to a fm
1365!> \param msource ...
1366!> \param mtarget ...
1367!> \param ncol ...
1368!> \param source_start ...
1369!> \param target_start ...
1370! **************************************************************************************************
1371 SUBROUTINE cp_fm_to_fm_columns(msource, mtarget, ncol, source_start, &
1372 target_start)
1373
1374 TYPE(cp_fm_type), INTENT(IN) :: msource, mtarget
1375 INTEGER, INTENT(IN) :: ncol
1376 INTEGER, INTENT(IN), OPTIONAL :: source_start, target_start
1377
1378 CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_columns'
1379
1380 INTEGER :: handle, n, ss, ts
1381 REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b
1382#if defined(__parallel)
1383 INTEGER :: i
1384 INTEGER, DIMENSION(9) :: desca, descb
1385#endif
1386
1387 CALL timeset(routinen, handle)
1388
1389 ss = 1
1390 ts = 1
1391
1392 IF (PRESENT(source_start)) ss = source_start
1393 IF (PRESENT(target_start)) ts = target_start
1394
1395 n = msource%matrix_struct%nrow_global
1396
1397 a => msource%local_data
1398 b => mtarget%local_data
1399
1400#if defined(__parallel)
1401 desca(:) = msource%matrix_struct%descriptor(:)
1402 descb(:) = mtarget%matrix_struct%descriptor(:)
1403 DO i = 0, ncol - 1
1404 CALL pdcopy(n, a, 1, ss + i, desca, 1, b, 1, ts + i, descb, 1)
1405 END DO
1406#else
1407 IF (ss <= SIZE(a, 2) .AND. ts <= SIZE(b, 2)) THEN
1408 CALL dcopy(ncol*n, a(:, ss), 1, b(:, ts), 1)
1409 END IF
1410#endif
1411
1412 CALL timestop(handle)
1413
1414 END SUBROUTINE cp_fm_to_fm_columns
1415
1416! **************************************************************************************************
1417!> \brief copy just a triangular matrix
1418!> \param msource ...
1419!> \param mtarget ...
1420!> \param uplo ...
1421! **************************************************************************************************
1422 SUBROUTINE cp_fm_to_fm_triangular(msource, mtarget, uplo)
1423
1424 TYPE(cp_fm_type), INTENT(IN) :: msource, mtarget
1425 CHARACTER(LEN=1), OPTIONAL, INTENT(IN) :: uplo
1426
1427 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_to_fm_triangular'
1428
1429 CHARACTER(LEN=1) :: myuplo
1430 INTEGER :: handle, ncol, nrow
1431 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b
1432#if defined(__parallel)
1433 INTEGER, DIMENSION(9) :: desca, descb
1434#endif
1435
1436 CALL timeset(routinen, handle)
1437
1438 myuplo = 'U'
1439 IF (PRESENT(uplo)) myuplo = uplo
1440
1441 nrow = msource%matrix_struct%nrow_global
1442 ncol = msource%matrix_struct%ncol_global
1443
1444 a => msource%local_data
1445 b => mtarget%local_data
1446
1447#if defined(__parallel)
1448 desca(:) = msource%matrix_struct%descriptor(:)
1449 descb(:) = mtarget%matrix_struct%descriptor(:)
1450 CALL pdlacpy(myuplo, nrow, ncol, a(1, 1), 1, 1, desca, b(1, 1), 1, 1, descb)
1451#else
1452 CALL dlacpy(myuplo, nrow, ncol, a(1, 1), nrow, b(1, 1), nrow)
1453#endif
1454
1455 CALL timestop(handle)
1456
1457 END SUBROUTINE cp_fm_to_fm_triangular
1458
1459! **************************************************************************************************
1460!> \brief copy just a part ot the matrix
1461!> \param msource ...
1462!> \param mtarget ...
1463!> \param nrow ...
1464!> \param ncol ...
1465!> \param s_firstrow ...
1466!> \param s_firstcol ...
1467!> \param t_firstrow ...
1468!> \param t_firstcol ...
1469! **************************************************************************************************
1470
1471 SUBROUTINE cp_fm_to_fm_submat(msource, mtarget, nrow, ncol, s_firstrow, s_firstcol, t_firstrow, t_firstcol)
1472
1473 TYPE(cp_fm_type), INTENT(IN) :: msource, mtarget
1474 INTEGER, INTENT(IN) :: nrow, ncol, s_firstrow, &
1475 s_firstcol, t_firstrow, &
1476 t_firstcol
1477
1478 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_to_fm_submat'
1479
1480 INTEGER :: handle, i, na, nb, ss, ts
1481 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b
1482#if defined(__parallel)
1483 INTEGER, DIMENSION(9) :: desca, descb
1484#endif
1485
1486 CALL timeset(routinen, handle)
1487
1488 a => msource%local_data
1489 b => mtarget%local_data
1490
1491 na = msource%matrix_struct%nrow_global
1492 nb = mtarget%matrix_struct%nrow_global
1493! nrow must be <= na and nb
1494 IF (nrow > na) &
1495 cpabort("cannot copy because nrow > number of rows of source matrix")
1496 IF (nrow > nb) &
1497 cpabort("cannot copy because nrow > number of rows of target matrix")
1498 na = msource%matrix_struct%ncol_global
1499 nb = mtarget%matrix_struct%ncol_global
1500! ncol must be <= na_col and nb_col
1501 IF (ncol > na) &
1502 cpabort("cannot copy because nrow > number of rows of source matrix")
1503 IF (ncol > nb) &
1504 cpabort("cannot copy because nrow > number of rows of target matrix")
1505
1506#if defined(__parallel)
1507 desca(:) = msource%matrix_struct%descriptor(:)
1508 descb(:) = mtarget%matrix_struct%descriptor(:)
1509 DO i = 0, ncol - 1
1510 ss = s_firstcol + i
1511 ts = t_firstcol + i
1512 CALL pdcopy(nrow, a, s_firstrow, ss, desca, 1, b, t_firstrow, ts, descb, 1)
1513 END DO
1514#else
1515 DO i = 0, ncol - 1
1516 ss = s_firstcol + i
1517 ts = t_firstcol + i
1518 CALL dcopy(nrow, a(s_firstrow:, ss), 1, b(t_firstrow:, ts), 1)
1519 END DO
1520#endif
1521
1522 CALL timestop(handle)
1523 END SUBROUTINE cp_fm_to_fm_submat
1524
1525! **************************************************************************************************
1526!> \brief General copy of a fm matrix to another fm matrix.
1527!> Uses non-blocking MPI rather than ScaLAPACK.
1528!>
1529!> \param source input fm matrix
1530!> \param destination output fm matrix
1531!> \param para_env parallel environment corresponding to the BLACS env that covers all parts
1532!> of the input and output matrices
1533!> \par History
1534!> 31-Jan-2017 : Re-implemented using non-blocking MPI [IainB, MarkT]
1535! **************************************************************************************************
1536 SUBROUTINE cp_fm_copy_general(source, destination, para_env)
1537 TYPE(cp_fm_type), INTENT(IN) :: source, destination
1538 TYPE(mp_para_env_type), INTENT(IN) :: para_env
1539
1540 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_copy_general'
1541
1542 INTEGER :: handle
1543 TYPE(copy_info_type) :: info
1544
1545 CALL timeset(routinen, handle)
1546
1547 CALL cp_fm_start_copy_general(source, destination, para_env, info)
1548 IF (ASSOCIATED(destination%matrix_struct)) THEN
1549 CALL cp_fm_finish_copy_general(destination, info)
1550 END IF
1551 IF (ASSOCIATED(source%matrix_struct)) THEN
1553 END IF
1554
1555 CALL timestop(handle)
1556 END SUBROUTINE cp_fm_copy_general
1557
1558! **************************************************************************************************
1559!> \brief Initiates the copy operation: get distribution data, post MPI isend and irecvs
1560!> \param source input fm matrix
1561!> \param destination output fm matrix
1562!> \param para_env parallel environment corresponding to the BLACS env that covers all parts
1563!> of the input and output matrices
1564!> \param info all of the data that will be needed to complete the copy operation
1565! **************************************************************************************************
1566 SUBROUTINE cp_fm_start_copy_general(source, destination, para_env, info)
1567 TYPE(cp_fm_type), INTENT(IN) :: source, destination
1568 TYPE(mp_para_env_type), INTENT(IN) :: para_env
1569 TYPE(copy_info_type), INTENT(OUT) :: info
1570
1571 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_start_copy_general'
1572
1573 INTEGER :: dest_p_i, dest_q_j, global_rank, global_size, handle, i, j, k, mpi_rank, &
1574 ncol_block_dest, ncol_block_src, ncol_local_recv, ncol_local_send, ncols, &
1575 nrow_block_dest, nrow_block_src, nrow_local_recv, nrow_local_send, nrows, p, q, &
1576 recv_rank, recv_size, send_rank, send_size
1577 INTEGER, ALLOCATABLE, DIMENSION(:) :: all_ranks, dest2global, dest_p, dest_q, &
1578 recv_count, send_count, send_disp, &
1579 source2global, src_p, src_q
1580 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: dest_blacs2mpi
1581 INTEGER, DIMENSION(2) :: dest_block, dest_block_tmp, dest_num_pe, &
1582 src_block, src_block_tmp, src_num_pe
1583 INTEGER, DIMENSION(:), POINTER :: recv_col_indices, recv_row_indices, &
1584 send_col_indices, send_row_indices
1585 TYPE(cp_fm_struct_type), POINTER :: recv_dist, send_dist
1586 TYPE(mp_request_type), DIMENSION(6) :: recv_req, send_req
1587
1588 CALL timeset(routinen, handle)
1589
1590 IF (.NOT. cp2k_is_parallel) THEN
1591 ! Just copy all of the matrix data into a 'send buffer', to be unpacked later
1592 nrow_local_send = SIZE(source%local_data, 1)
1593 ncol_local_send = SIZE(source%local_data, 2)
1594 ALLOCATE (info%send_buf(nrow_local_send*ncol_local_send))
1595 k = 0
1596 DO j = 1, ncol_local_send
1597 DO i = 1, nrow_local_send
1598 k = k + 1
1599 info%send_buf(k) = source%local_data(i, j)
1600 END DO
1601 END DO
1602 ELSE
1603 ! assumption of double precision data is carried forward from ScaLAPACK version
1604 IF (source%use_sp) cpabort("only DP kind implemented")
1605 IF (destination%use_sp) cpabort("only DP kind implemented")
1606
1607 NULLIFY (recv_dist, send_dist)
1608 NULLIFY (recv_col_indices, recv_row_indices, send_col_indices, send_row_indices)
1609
1610 ! The 'global' communicator contains both the source and destination decompositions
1611 global_size = para_env%num_pe
1612 global_rank = para_env%mepos
1613
1614 ! The source/send decomposition and destination/recv decompositions may only exist on
1615 ! on a subset of the processes involved in the communication
1616 ! Check if the source and/or destination arguments are .not. ASSOCIATED():
1617 ! if so, skip the send / recv parts (since these processes do not participate in the sending/receiving distribution)
1618 IF (ASSOCIATED(destination%matrix_struct)) THEN
1619 recv_dist => destination%matrix_struct
1620 recv_rank = recv_dist%para_env%mepos
1621 ELSE
1622 recv_rank = mp_proc_null
1623 END IF
1624
1625 IF (ASSOCIATED(source%matrix_struct)) THEN
1626 send_dist => source%matrix_struct
1627 send_rank = send_dist%para_env%mepos
1628 ELSE
1629 send_rank = mp_proc_null
1630 END IF
1631
1632 ! Map the rank in the source/dest communicator to the global rank
1633 ALLOCATE (all_ranks(0:global_size - 1))
1634
1635 CALL para_env%allgather(send_rank, all_ranks)
1636 IF (ASSOCIATED(recv_dist)) THEN
1637 ALLOCATE (source2global(0:count(all_ranks .NE. mp_proc_null) - 1))
1638 DO i = 0, global_size - 1
1639 IF (all_ranks(i) .NE. mp_proc_null) THEN
1640 source2global(all_ranks(i)) = i
1641 END IF
1642 END DO
1643 END IF
1644
1645 CALL para_env%allgather(recv_rank, all_ranks)
1646 IF (ASSOCIATED(send_dist)) THEN
1647 ALLOCATE (dest2global(0:count(all_ranks .NE. mp_proc_null) - 1))
1648 DO i = 0, global_size - 1
1649 IF (all_ranks(i) .NE. mp_proc_null) THEN
1650 dest2global(all_ranks(i)) = i
1651 END IF
1652 END DO
1653 END IF
1654 DEALLOCATE (all_ranks)
1655
1656 ! Some data from the two decompositions will be needed by all processes in the global group :
1657 ! process grid shape, block size, and the BLACS-to-MPI mapping
1658
1659 ! The global root process will receive the data (from the root process in each decomposition)
1660 send_req(:) = mp_request_null
1661 IF (global_rank == 0) THEN
1662 recv_req(:) = mp_request_null
1663 CALL para_env%irecv(src_block, mp_any_source, recv_req(1), tag=src_tag)
1664 CALL para_env%irecv(dest_block, mp_any_source, recv_req(2), tag=dest_tag)
1665 CALL para_env%irecv(src_num_pe, mp_any_source, recv_req(3), tag=src_tag)
1666 CALL para_env%irecv(dest_num_pe, mp_any_source, recv_req(4), tag=dest_tag)
1667 END IF
1668
1669 IF (ASSOCIATED(send_dist)) THEN
1670 IF ((send_rank .EQ. 0)) THEN
1671 ! need to use separate buffers here in case this is actually global rank 0
1672 src_block_tmp = (/send_dist%nrow_block, send_dist%ncol_block/)
1673 CALL para_env%isend(src_block_tmp, 0, send_req(1), tag=src_tag)
1674 CALL para_env%isend(send_dist%context%num_pe, 0, send_req(2), tag=src_tag)
1675 END IF
1676 END IF
1677
1678 IF (ASSOCIATED(recv_dist)) THEN
1679 IF ((recv_rank .EQ. 0)) THEN
1680 dest_block_tmp = (/recv_dist%nrow_block, recv_dist%ncol_block/)
1681 CALL para_env%isend(dest_block_tmp, 0, send_req(3), tag=dest_tag)
1682 CALL para_env%isend(recv_dist%context%num_pe, 0, send_req(4), tag=dest_tag)
1683 END IF
1684 END IF
1685
1686 IF (global_rank == 0) THEN
1687 CALL mp_waitall(recv_req(1:4))
1688 ! Now we know the process decomposition, we can allocate the arrays to hold the blacs2mpi mapping
1689 ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
1690 dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1) &
1691 )
1692 CALL para_env%irecv(info%src_blacs2mpi, mp_any_source, recv_req(5), tag=src_tag)
1693 CALL para_env%irecv(dest_blacs2mpi, mp_any_source, recv_req(6), tag=dest_tag)
1694 END IF
1695
1696 IF (ASSOCIATED(send_dist)) THEN
1697 IF ((send_rank .EQ. 0)) THEN
1698 CALL para_env%isend(send_dist%context%blacs2mpi(:, :), 0, send_req(5), tag=src_tag)
1699 END IF
1700 END IF
1701
1702 IF (ASSOCIATED(recv_dist)) THEN
1703 IF ((recv_rank .EQ. 0)) THEN
1704 CALL para_env%isend(recv_dist%context%blacs2mpi(:, :), 0, send_req(6), tag=dest_tag)
1705 END IF
1706 END IF
1707
1708 IF (global_rank == 0) THEN
1709 CALL mp_waitall(recv_req(5:6))
1710 END IF
1711
1712 ! Finally, broadcast the data to all processes in the global communicator
1713 CALL para_env%bcast(src_block, 0)
1714 CALL para_env%bcast(dest_block, 0)
1715 CALL para_env%bcast(src_num_pe, 0)
1716 CALL para_env%bcast(dest_num_pe, 0)
1717 info%src_num_pe(1:2) = src_num_pe(1:2)
1718 info%nblock_src(1:2) = src_block(1:2)
1719 IF (global_rank .NE. 0) THEN
1720 ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
1721 dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1) &
1722 )
1723 END IF
1724 CALL para_env%bcast(info%src_blacs2mpi, 0)
1725 CALL para_env%bcast(dest_blacs2mpi, 0)
1726
1727 recv_size = dest_num_pe(1)*dest_num_pe(2)
1728 send_size = src_num_pe(1)*src_num_pe(2)
1729 info%send_size = send_size
1730 CALL mp_waitall(send_req(:))
1731
1732 ! Setup is now complete, we can start the actual communication here.
1733 ! The order implemented here is:
1734 ! DEST_1
1735 ! compute recv sizes
1736 ! call irecv
1737 ! SRC_1
1738 ! compute send sizes
1739 ! pack send buffers
1740 ! call isend
1741 ! DEST_2
1742 ! wait for the recvs and unpack buffers (this part eventually will go into another routine to allow comms to run concurrently)
1743 ! SRC_2
1744 ! wait for the sends
1745
1746 ! DEST_1
1747 IF (ASSOCIATED(recv_dist)) THEN
1748 CALL cp_fm_struct_get(recv_dist, row_indices=recv_row_indices, &
1749 col_indices=recv_col_indices &
1750 )
1751 info%recv_col_indices => recv_col_indices
1752 info%recv_row_indices => recv_row_indices
1753 nrow_block_src = src_block(1)
1754 ncol_block_src = src_block(2)
1755 ALLOCATE (recv_count(0:send_size - 1), info%recv_disp(0:send_size - 1), info%recv_request(0:send_size - 1))
1756
1757 ! Determine the recv counts, allocate the receive buffers, call mpi_irecv for all the non-zero sized receives
1758 nrow_local_recv = recv_dist%nrow_locals(recv_dist%context%mepos(1))
1759 ncol_local_recv = recv_dist%ncol_locals(recv_dist%context%mepos(2))
1760 info%nlocal_recv(1) = nrow_local_recv
1761 info%nlocal_recv(2) = ncol_local_recv
1762 ! Initialise src_p, src_q arrays (sized using number of rows/cols in the receiving distribution)
1763 ALLOCATE (src_p(nrow_local_recv), src_q(ncol_local_recv))
1764 DO i = 1, nrow_local_recv
1765 ! For each local row we will receive, we look up its global row (in recv_row_indices),
1766 ! then work out which row block it comes from, and which process row that row block comes from.
1767 src_p(i) = mod(((recv_row_indices(i) - 1)/nrow_block_src), src_num_pe(1))
1768 END DO
1769 DO j = 1, ncol_local_recv
1770 ! Similarly for the columns
1771 src_q(j) = mod(((recv_col_indices(j) - 1)/ncol_block_src), src_num_pe(2))
1772 END DO
1773 ! src_p/q now contains the process row/column ID that will send data to that row/column
1774
1775 DO q = 0, src_num_pe(2) - 1
1776 ncols = count(src_q .EQ. q)
1777 DO p = 0, src_num_pe(1) - 1
1778 nrows = count(src_p .EQ. p)
1779 ! Use the send_dist here as we are looking up the processes where the data comes from
1780 recv_count(info%src_blacs2mpi(p, q)) = nrows*ncols
1781 END DO
1782 END DO
1783 DEALLOCATE (src_p, src_q)
1784
1785 ! Use one long buffer (and displacements into that buffer)
1786 ! this prevents the need for a rectangular array where not all elements will be populated
1787 ALLOCATE (info%recv_buf(sum(recv_count(:))))
1788 info%recv_disp(0) = 0
1789 DO i = 1, send_size - 1
1790 info%recv_disp(i) = info%recv_disp(i - 1) + recv_count(i - 1)
1791 END DO
1792
1793 ! Issue receive calls on ranks which expect data
1794 DO k = 0, send_size - 1
1795 IF (recv_count(k) .GT. 0) THEN
1796 CALL para_env%irecv(info%recv_buf(info%recv_disp(k) + 1:info%recv_disp(k) + recv_count(k)), &
1797 source2global(k), info%recv_request(k))
1798 END IF
1799 END DO
1800 DEALLOCATE (source2global)
1801 END IF ! ASSOCIATED(recv_dist)
1802
1803 ! SRC_1
1804 IF (ASSOCIATED(send_dist)) THEN
1805 CALL cp_fm_struct_get(send_dist, row_indices=send_row_indices, &
1806 col_indices=send_col_indices &
1807 )
1808 nrow_block_dest = dest_block(1)
1809 ncol_block_dest = dest_block(2)
1810 ALLOCATE (send_count(0:recv_size - 1), send_disp(0:recv_size - 1), info%send_request(0:recv_size - 1))
1811
1812 ! Determine the send counts, allocate the send buffers
1813 nrow_local_send = send_dist%nrow_locals(send_dist%context%mepos(1))
1814 ncol_local_send = send_dist%ncol_locals(send_dist%context%mepos(2))
1815
1816 ! Initialise dest_p, dest_q arrays (sized nrow_local, ncol_local)
1817 ! i.e. number of rows,cols in the sending distribution
1818 ALLOCATE (dest_p(nrow_local_send), dest_q(ncol_local_send))
1819
1820 DO i = 1, nrow_local_send
1821 ! Use the send_dist%row_indices() here (we are looping over the local rows we will send)
1822 dest_p(i) = mod(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
1823 END DO
1824 DO j = 1, ncol_local_send
1825 dest_q(j) = mod(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
1826 END DO
1827 ! dest_p/q now contain the process row/column ID that will receive data from that row/column
1828
1829 DO q = 0, dest_num_pe(2) - 1
1830 ncols = count(dest_q .EQ. q)
1831 DO p = 0, dest_num_pe(1) - 1
1832 nrows = count(dest_p .EQ. p)
1833 send_count(dest_blacs2mpi(p, q)) = nrows*ncols
1834 END DO
1835 END DO
1836 DEALLOCATE (dest_p, dest_q)
1837
1838 ! Allocate the send buffer using send_count -- and calculate the offset into the buffer for each process
1839 ALLOCATE (info%send_buf(sum(send_count(:))))
1840 send_disp(0) = 0
1841 DO k = 1, recv_size - 1
1842 send_disp(k) = send_disp(k - 1) + send_count(k - 1)
1843 END DO
1844
1845 ! Loop over the smat, pack the send buffers
1846 send_count(:) = 0
1847 DO j = 1, ncol_local_send
1848 ! Use send_col_indices and row_indices here, as we are looking up the global row/column number of local rows.
1849 dest_q_j = mod(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
1850 DO i = 1, nrow_local_send
1851 dest_p_i = mod(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
1852 mpi_rank = dest_blacs2mpi(dest_p_i, dest_q_j)
1853 send_count(mpi_rank) = send_count(mpi_rank) + 1
1854 info%send_buf(send_disp(mpi_rank) + send_count(mpi_rank)) = source%local_data(i, j)
1855 END DO
1856 END DO
1857
1858 ! For each non-zero send_count, call mpi_isend
1859 DO k = 0, recv_size - 1
1860 IF (send_count(k) .GT. 0) THEN
1861 CALL para_env%isend(info%send_buf(send_disp(k) + 1:send_disp(k) + send_count(k)), &
1862 dest2global(k), info%send_request(k))
1863 END IF
1864 END DO
1865 DEALLOCATE (send_count, send_disp, dest2global)
1866 END IF ! ASSOCIATED(send_dist)
1867 DEALLOCATE (dest_blacs2mpi)
1868
1869 END IF !IF (.NOT. cp2k_is_parallel)
1870
1871 CALL timestop(handle)
1872
1873 END SUBROUTINE cp_fm_start_copy_general
1874
1875! **************************************************************************************************
1876!> \brief Completes the copy operation: wait for comms, unpack, clean up MPI state
1877!> \param destination output fm matrix
1878!> \param info all of the data that will be needed to complete the copy operation
1879! **************************************************************************************************
1880 SUBROUTINE cp_fm_finish_copy_general(destination, info)
1881 TYPE(cp_fm_type), INTENT(IN) :: destination
1882 TYPE(copy_info_type), INTENT(INOUT) :: info
1883
1884 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_finish_copy_general'
1885
1886 INTEGER :: handle, i, j, k, mpi_rank, send_size, &
1887 src_p_i, src_q_j
1888 INTEGER, ALLOCATABLE, DIMENSION(:) :: recv_count
1889 INTEGER, DIMENSION(2) :: nblock_src, nlocal_recv, src_num_pe
1890 INTEGER, DIMENSION(:), POINTER :: recv_col_indices, recv_row_indices
1891
1892 CALL timeset(routinen, handle)
1893
1894 IF (.NOT. cp2k_is_parallel) THEN
1895 ! Now unpack the data from the 'send buffer'
1896 k = 0
1897 DO j = 1, SIZE(destination%local_data, 2)
1898 DO i = 1, SIZE(destination%local_data, 1)
1899 k = k + 1
1900 destination%local_data(i, j) = info%send_buf(k)
1901 END DO
1902 END DO
1903 DEALLOCATE (info%send_buf)
1904 ELSE
1905 ! Set up local variables ...
1906 send_size = info%send_size
1907 nlocal_recv(1:2) = info%nlocal_recv(:)
1908 nblock_src(1:2) = info%nblock_src(:)
1909 src_num_pe(1:2) = info%src_num_pe(:)
1910 recv_col_indices => info%recv_col_indices
1911 recv_row_indices => info%recv_row_indices
1912
1913 ! ... use the local variables to do the work
1914 ! DEST_2
1915 CALL mp_waitall(info%recv_request(:))
1916 ALLOCATE (recv_count(0:send_size - 1))
1917 ! Loop over the rmat, filling it in with data from the recv buffers
1918 ! (here the block sizes, num_pes refer to the distribution of the source matrix)
1919 recv_count(:) = 0
1920 DO j = 1, nlocal_recv(2)
1921 src_q_j = mod(((recv_col_indices(j) - 1)/nblock_src(2)), src_num_pe(2))
1922 DO i = 1, nlocal_recv(1)
1923 src_p_i = mod(((recv_row_indices(i) - 1)/nblock_src(1)), src_num_pe(1))
1924 mpi_rank = info%src_blacs2mpi(src_p_i, src_q_j)
1925 recv_count(mpi_rank) = recv_count(mpi_rank) + 1
1926 destination%local_data(i, j) = info%recv_buf(info%recv_disp(mpi_rank) + recv_count(mpi_rank))
1927 END DO
1928 END DO
1929 DEALLOCATE (recv_count, info%recv_disp, info%recv_request, info%recv_buf, info%src_blacs2mpi)
1930 ! Invalidate the stored state
1931 NULLIFY (info%recv_col_indices, &
1932 info%recv_row_indices)
1933
1934 END IF
1935
1936 CALL timestop(handle)
1937
1938 END SUBROUTINE cp_fm_finish_copy_general
1939
1940! **************************************************************************************************
1941!> \brief Completes the copy operation: wait for comms clean up MPI state
1942!> \param info all of the data that will be needed to complete the copy operation
1943! **************************************************************************************************
1945 TYPE(copy_info_type), INTENT(INOUT) :: info
1946
1947 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_cleanup_copy_general'
1948
1949 INTEGER :: handle
1950
1951 CALL timeset(routinen, handle)
1952
1953 IF (.NOT. cp2k_is_parallel) THEN
1954 ! Don't do anything - no MPI state for the serial case
1955 ELSE
1956 ! SRC_2
1957 ! If this process is also in the destination decomposition, this deallocate
1958 ! Was already done in cp_fm_finish_copy_general
1959 IF (ALLOCATED(info%src_blacs2mpi)) THEN
1960 DEALLOCATE (info%src_blacs2mpi)
1961 END IF
1962 CALL mp_waitall(info%send_request)
1963 DEALLOCATE (info%send_request, info%send_buf)
1964
1965 END IF
1966
1967 CALL timestop(handle)
1968
1969 END SUBROUTINE cp_fm_cleanup_copy_general
1970
1971! **************************************************************************************************
1972!> \brief General copy of a submatrix of fm matrix to a submatrix of another fm matrix.
1973!> The two matrices can have different contexts.
1974!>
1975!> Summary of distribution routines for dense matrices
1976!> The following will copy A(iA:iA+M-1,jA:jA+N-1) to B(iB:iB+M-1,jB:jB+N-1):
1977!>
1978!> call pdgemr2d(M,N,Aloc,iA,jA,descA,Bloc,iB,jB,descB,context)
1979!>
1980!> A process that is not a part of the context of A should set descA(2)
1981!> to -1, and similarly for B.
1982!>
1983!> \param source input fm matrix
1984!> \param destination output fm matrix
1985!> \param nrows number of rows of sub matrix to be copied
1986!> \param ncols number of cols of sub matrix to be copied
1987!> \param s_firstrow starting global row index of sub matrix in source
1988!> \param s_firstcol starting global col index of sub matrix in source
1989!> \param d_firstrow starting global row index of sub matrix in destination
1990!> \param d_firstcol starting global col index of sub matrix in destination
1991!> \param global_context process grid that covers all parts of either A or B.
1992! **************************************************************************************************
1993 SUBROUTINE cp_fm_to_fm_submat_general(source, &
1994 destination, &
1995 nrows, &
1996 ncols, &
1997 s_firstrow, &
1998 s_firstcol, &
1999 d_firstrow, &
2000 d_firstcol, &
2001 global_context)
2002
2003 TYPE(cp_fm_type), INTENT(IN) :: source, destination
2004 INTEGER, INTENT(IN) :: nrows, ncols, s_firstrow, s_firstcol, &
2005 d_firstrow, d_firstcol
2006
2007 CLASS(cp_blacs_type), INTENT(IN) :: global_context
2008
2009 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_to_fm_submat_general'
2010
2011 LOGICAL :: debug
2012 INTEGER :: handle
2013#if defined(__parallel)
2014 INTEGER, DIMENSION(9) :: desca, descb
2015 REAL(kind=dp), DIMENSION(1, 1), TARGET :: dummy
2016 REAL(kind=dp), DIMENSION(:, :), POINTER :: smat, dmat
2017#endif
2018
2019 CALL timeset(routinen, handle)
2020
2021 debug = debug_this_module
2022
2023 IF (.NOT. cp2k_is_parallel) THEN
2024 CALL cp_fm_to_fm_submat(source, &
2025 destination, &
2026 nrows, &
2027 ncols, &
2028 s_firstrow, &
2029 s_firstcol, &
2030 d_firstrow, &
2031 d_firstcol)
2032 ELSE
2033#ifdef __parallel
2034 NULLIFY (smat, dmat)
2035 ! check whether source is available on this process
2036 IF (ASSOCIATED(source%matrix_struct)) THEN
2037 desca = source%matrix_struct%descriptor
2038 IF (source%use_sp) cpabort("only DP kind implemented")
2039 IF (nrows .GT. source%matrix_struct%nrow_global) &
2040 cpabort("nrows is greater than nrow_global of source")
2041 IF (ncols .GT. source%matrix_struct%ncol_global) &
2042 cpabort("ncols is greater than ncol_global of source")
2043 smat => source%local_data
2044 ELSE
2045 desca = -1
2046 smat => dummy
2047 END IF
2048 ! check destination is available on this process
2049 IF (ASSOCIATED(destination%matrix_struct)) THEN
2050 descb = destination%matrix_struct%descriptor
2051 IF (destination%use_sp) cpabort("only DP kind implemented")
2052 IF (nrows .GT. destination%matrix_struct%nrow_global) &
2053 cpabort("nrows is greater than nrow_global of destination")
2054 IF (ncols .GT. destination%matrix_struct%ncol_global) &
2055 cpabort("ncols is greater than ncol_global of destination")
2056 dmat => destination%local_data
2057 ELSE
2058 descb = -1
2059 dmat => dummy
2060 END IF
2061 ! do copy
2062
2063 CALL pdgemr2d(nrows, &
2064 ncols, &
2065 smat, &
2066 s_firstrow, &
2067 s_firstcol, &
2068 desca, &
2069 dmat, &
2070 d_firstrow, &
2071 d_firstcol, &
2072 descb, &
2073 global_context%get_handle())
2074#else
2075 mark_used(global_context)
2076 cpabort("this subroutine only supports SCALAPACK")
2077#endif
2078 END IF
2079
2080 CALL timestop(handle)
2081
2082 END SUBROUTINE cp_fm_to_fm_submat_general
2083
2084! **************************************************************************************************
2085!> \brief ...
2086!> \param matrix ...
2087!> \param irow_global ...
2088!> \param icol_global ...
2089!> \param alpha ...
2090! **************************************************************************************************
2091 SUBROUTINE cp_fm_add_to_element(matrix, irow_global, icol_global, alpha)
2092
2093 ! Add alpha to the matrix element specified by the global indices
2094 ! irow_global and icol_global
2095
2096 ! - Creation (05.05.06,MK)
2097
2098 TYPE(cp_fm_type), INTENT(IN) :: matrix
2099 INTEGER, INTENT(IN) :: irow_global, icol_global
2100 REAL(kind=dp), INTENT(IN) :: alpha
2101
2102 INTEGER :: mypcol, myprow, npcol, nprow
2103 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2104 TYPE(cp_blacs_env_type), POINTER :: context
2105#if defined(__parallel)
2106 INTEGER :: icol_local, ipcol, iprow, &
2107 irow_local
2108 INTEGER, DIMENSION(9) :: desca
2109#endif
2110
2111 context => matrix%matrix_struct%context
2112
2113 myprow = context%mepos(1)
2114 mypcol = context%mepos(2)
2115
2116 nprow = context%num_pe(1)
2117 npcol = context%num_pe(2)
2118
2119 a => matrix%local_data
2120
2121#if defined(__parallel)
2122
2123 desca(:) = matrix%matrix_struct%descriptor(:)
2124
2125 CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
2126 irow_local, icol_local, iprow, ipcol)
2127
2128 IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
2129 a(irow_local, icol_local) = a(irow_local, icol_local) + alpha
2130 END IF
2131
2132#else
2133
2134 a(irow_global, icol_global) = a(irow_global, icol_global) + alpha
2135
2136#endif
2137
2138 END SUBROUTINE cp_fm_add_to_element
2139
2140! **************************************************************************************************
2141!> \brief ...
2142!> \param fm ...
2143!> \param unit ...
2144! **************************************************************************************************
2145 SUBROUTINE cp_fm_write_unformatted(fm, unit)
2146 TYPE(cp_fm_type), INTENT(IN) :: fm
2147 INTEGER, INTENT(IN) :: unit
2148
2149 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_write_unformatted'
2150
2151 INTEGER :: handle, j, max_block, &
2152 ncol_global, nrow_global
2153 TYPE(mp_para_env_type), POINTER :: para_env
2154#if defined(__parallel)
2155 INTEGER :: i, i_block, icol_local, &
2156 in, info, ipcol, &
2157 iprow, irow_local, &
2158 mepos, &
2159 num_pe, rb, tag
2160 INTEGER, DIMENSION(9) :: desc
2161 REAL(kind=dp), DIMENSION(:), POINTER :: vecbuf
2162 REAL(kind=dp), DIMENSION(:, :), POINTER :: newdat
2163 TYPE(cp_blacs_type) :: ictxt_loc
2164 INTEGER, EXTERNAL :: numroc
2165#endif
2166
2167 CALL timeset(routinen, handle)
2168 CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
2169 para_env=para_env)
2170
2171#if defined(__parallel)
2172 num_pe = para_env%num_pe
2173 mepos = para_env%mepos
2174 rb = nrow_global
2175 tag = 0
2176 ! get a new context
2177 CALL ictxt_loc%gridinit(para_env, 'R', 1, num_pe)
2178 CALL descinit(desc, nrow_global, ncol_global, rb, max_block, 0, 0, ictxt_loc%get_handle(), nrow_global, info)
2179 cpassert(info == 0)
2180 associate(nprow => ictxt_loc%num_pe(1), npcol => ictxt_loc%num_pe(2), &
2181 myprow => ictxt_loc%mepos(1), mypcol => ictxt_loc%mepos(2))
2182 in = numroc(ncol_global, max_block, mypcol, 0, npcol)
2183
2184 ALLOCATE (newdat(nrow_global, max(1, in)))
2185
2186 ! do the actual scalapack to cols reordering
2187 CALL pdgemr2d(nrow_global, ncol_global, fm%local_data, 1, 1, &
2188 fm%matrix_struct%descriptor, &
2189 newdat, 1, 1, desc, ictxt_loc%get_handle())
2190
2191 ALLOCATE (vecbuf(nrow_global*max_block))
2192 vecbuf = huge(1.0_dp) ! init for valgrind
2193
2194 DO i = 1, ncol_global, max(max_block, 1)
2195 i_block = min(max_block, ncol_global - i + 1)
2196 CALL infog2l(1, i, desc, nprow, npcol, myprow, mypcol, &
2197 irow_local, icol_local, iprow, ipcol)
2198 IF (ipcol == mypcol) THEN
2199 DO j = 1, i_block
2200 vecbuf((j - 1)*nrow_global + 1:nrow_global*j) = newdat(:, icol_local + j - 1)
2201 END DO
2202 END IF
2203
2204 IF (ipcol == 0) THEN
2205 ! do nothing
2206 ELSE
2207 IF (ipcol == mypcol) THEN
2208 CALL para_env%send(vecbuf(:), 0, tag)
2209 END IF
2210 IF (mypcol == 0) THEN
2211 CALL para_env%recv(vecbuf(:), ipcol, tag)
2212 END IF
2213 END IF
2214
2215 IF (unit > 0) THEN
2216 DO j = 1, i_block
2217 WRITE (unit) vecbuf((j - 1)*nrow_global + 1:nrow_global*j)
2218 END DO
2219 END IF
2220
2221 END DO
2222 END associate
2223 DEALLOCATE (vecbuf)
2224
2225 CALL ictxt_loc%gridexit()
2226
2227 DEALLOCATE (newdat)
2228
2229#else
2230
2231 IF (unit > 0) THEN
2232 DO j = 1, ncol_global
2233 WRITE (unit) fm%local_data(:, j)
2234 END DO
2235 END IF
2236
2237#endif
2238 CALL timestop(handle)
2239
2240 END SUBROUTINE cp_fm_write_unformatted
2241
2242! **************************************************************************************************
2243!> \brief Write out a full matrix in plain text.
2244!> \param fm the matrix to be outputted
2245!> \param unit the unit number for I/O
2246!> \param header optional header
2247!> \param value_format ...
2248! **************************************************************************************************
2249 SUBROUTINE cp_fm_write_formatted(fm, unit, header, value_format)
2250 TYPE(cp_fm_type), INTENT(IN) :: fm
2251 INTEGER, INTENT(IN) :: unit
2252 CHARACTER(LEN=*), INTENT(IN), OPTIONAL :: header, value_format
2253
2254 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_write_formatted'
2255
2256 CHARACTER(LEN=21) :: my_value_format
2257 INTEGER :: handle, i, j, max_block, &
2258 ncol_global, nrow_global
2259 TYPE(mp_para_env_type), POINTER :: para_env
2260#if defined(__parallel)
2261 INTEGER :: i_block, icol_local, &
2262 in, info, ipcol, &
2263 iprow, irow_local, &
2264 mepos, num_pe, rb, tag, k, &
2265 icol, irow
2266 INTEGER, DIMENSION(9) :: desc
2267 REAL(kind=dp), DIMENSION(:), POINTER :: vecbuf
2268 REAL(kind=dp), DIMENSION(:, :), POINTER :: newdat
2269 TYPE(cp_blacs_type) :: ictxt_loc
2270 INTEGER, EXTERNAL :: numroc
2271#endif
2272
2273 CALL timeset(routinen, handle)
2274 CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
2275 para_env=para_env)
2276
2277 IF (PRESENT(value_format)) THEN
2278 cpassert(len_trim(adjustl(value_format)) < 11)
2279 my_value_format = "(I10, I10, "//trim(adjustl(value_format))//")"
2280 ELSE
2281 my_value_format = "(I10, I10, ES24.12)"
2282 END IF
2283
2284 IF (unit > 0) THEN
2285 IF (PRESENT(header)) WRITE (unit, *) header
2286 WRITE (unit, "(A2, A8, A10, A24)") "#", "Row", "Column", adjustl("Value")
2287 END IF
2288
2289#if defined(__parallel)
2290 num_pe = para_env%num_pe
2291 mepos = para_env%mepos
2292 rb = nrow_global
2293 tag = 0
2294 ! get a new context
2295 CALL ictxt_loc%gridinit(para_env, 'R', 1, num_pe)
2296 CALL descinit(desc, nrow_global, ncol_global, rb, max_block, 0, 0, ictxt_loc%get_handle(), nrow_global, info)
2297 cpassert(info == 0)
2298 associate(nprow => ictxt_loc%num_pe(1), npcol => ictxt_loc%num_pe(2), &
2299 myprow => ictxt_loc%mepos(1), mypcol => ictxt_loc%mepos(2))
2300 in = numroc(ncol_global, max_block, mypcol, 0, npcol)
2301
2302 ALLOCATE (newdat(nrow_global, max(1, in)))
2303
2304 ! do the actual scalapack to cols reordering
2305 CALL pdgemr2d(nrow_global, ncol_global, fm%local_data, 1, 1, &
2306 fm%matrix_struct%descriptor, &
2307 newdat, 1, 1, desc, ictxt_loc%get_handle())
2308
2309 ALLOCATE (vecbuf(nrow_global*max_block))
2310 vecbuf = huge(1.0_dp) ! init for valgrind
2311 irow = 1
2312 icol = 1
2313
2314 DO i = 1, ncol_global, max(max_block, 1)
2315 i_block = min(max_block, ncol_global - i + 1)
2316 CALL infog2l(1, i, desc, nprow, npcol, myprow, mypcol, &
2317 irow_local, icol_local, iprow, ipcol)
2318 IF (ipcol == mypcol) THEN
2319 DO j = 1, i_block
2320 vecbuf((j - 1)*nrow_global + 1:nrow_global*j) = newdat(:, icol_local + j - 1)
2321 END DO
2322 END IF
2323
2324 IF (ipcol == 0) THEN
2325 ! do nothing
2326 ELSE
2327 IF (ipcol == mypcol) THEN
2328 CALL para_env%send(vecbuf(:), 0, tag)
2329 END IF
2330 IF (mypcol == 0) THEN
2331 CALL para_env%recv(vecbuf(:), ipcol, tag)
2332 END IF
2333 END IF
2334
2335 IF (unit > 0) THEN
2336 DO j = 1, i_block
2337 DO k = (j - 1)*nrow_global + 1, nrow_global*j
2338 WRITE (unit=unit, fmt=my_value_format) irow, icol, vecbuf(k)
2339 irow = irow + 1
2340 IF (irow > nrow_global) THEN
2341 irow = 1
2342 icol = icol + 1
2343 END IF
2344 END DO
2345 END DO
2346 END IF
2347
2348 END DO
2349 END associate
2350 DEALLOCATE (vecbuf)
2351
2352 CALL ictxt_loc%gridexit()
2353
2354 DEALLOCATE (newdat)
2355
2356#else
2357
2358 IF (unit > 0) THEN
2359 DO j = 1, ncol_global
2360 DO i = 1, nrow_global
2361 WRITE (unit=unit, fmt=my_value_format) i, j, fm%local_data(i, j)
2362 END DO
2363 END DO
2364 END IF
2365
2366#endif
2367 CALL timestop(handle)
2368
2369 END SUBROUTINE cp_fm_write_formatted
2370
2371! **************************************************************************************************
2372!> \brief ...
2373!> \param fm ...
2374!> \param unit ...
2375! **************************************************************************************************
2376 SUBROUTINE cp_fm_read_unformatted(fm, unit)
2377 TYPE(cp_fm_type), INTENT(INOUT) :: fm
2378 INTEGER, INTENT(IN) :: unit
2379
2380 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_read_unformatted'
2381
2382 INTEGER :: handle, j, max_block, &
2383 ncol_global, nrow_global
2384 TYPE(mp_para_env_type), POINTER :: para_env
2385#if defined(__parallel)
2386 INTEGER :: k, n_cols
2387 REAL(kind=dp), DIMENSION(:, :), POINTER :: vecbuf
2388#endif
2389
2390 CALL timeset(routinen, handle)
2391
2392 CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
2393 para_env=para_env)
2394
2395#if defined(__parallel)
2396
2397 ! the parallel case could be made more efficient (see cp_fm_write_unformatted)
2398
2399 ALLOCATE (vecbuf(nrow_global, max_block))
2400
2401 DO j = 1, ncol_global, max_block
2402
2403 n_cols = min(max_block, ncol_global - j + 1)
2404 IF (para_env%mepos == 0) THEN
2405 DO k = 1, n_cols
2406 READ (unit) vecbuf(:, k)
2407 END DO
2408 END IF
2409 CALL para_env%bcast(vecbuf, 0)
2410 CALL cp_fm_set_submatrix(fm, vecbuf, start_row=1, start_col=j, n_cols=n_cols)
2411
2412 END DO
2413
2414 DEALLOCATE (vecbuf)
2415
2416#else
2417
2418 DO j = 1, ncol_global
2419 READ (unit) fm%local_data(:, j)
2420 END DO
2421
2422#endif
2423
2424 CALL timestop(handle)
2425
2426 END SUBROUTINE cp_fm_read_unformatted
2427
2428! **************************************************************************************************
2429!> \brief ...
2430!> \param mm_type ...
2431! **************************************************************************************************
2432 SUBROUTINE cp_fm_setup(mm_type)
2433 INTEGER, INTENT(IN) :: mm_type
2434
2435 cp_fm_mm_type = mm_type
2436 END SUBROUTINE cp_fm_setup
2437
2438! **************************************************************************************************
2439!> \brief ...
2440!> \return ...
2441! **************************************************************************************************
2442 FUNCTION cp_fm_get_mm_type() RESULT(res)
2443 INTEGER :: res
2444
2445 res = cp_fm_mm_type
2446 END FUNCTION
2447
2448! **************************************************************************************************
2449!> \brief ...
2450!> \param ictxt ...
2451!> \param prec ...
2452!> \return ...
2453! **************************************************************************************************
2454 FUNCTION cp_fm_pilaenv(ictxt, prec) RESULT(res)
2455 INTEGER :: ictxt
2456 CHARACTER(LEN=1) :: prec
2457 INTEGER :: res
2458#if defined(__parallel)
2459 INTEGER :: pilaenv
2460 res = pilaenv(ictxt, prec)
2461#else
2462 mark_used(ictxt)
2463 mark_used(prec)
2464 res = -1
2465#endif
2466
2467 END FUNCTION
2468
2469END MODULE cp_fm_types
methods related to the blacs parallel environment
wrappers for the actual blacs calls. all functionality needed in the code should actually be provide ...
represent the structure of a full matrix
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
logical function, public cp_fm_struct_equivalent(fmstruct1, fmstruct2)
returns true if the two matrix structures are equivalent, false otherwise.
subroutine, public cp_fm_struct_retain(fmstruct)
retains a full matrix structure
subroutine, public cp_fm_struct_write_info(fmstruct, io_unit)
Write nicely formatted info about the FM struct to the given I/O unit.
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
Definition cp_fm_types.F:15
subroutine, public cp_fm_copy_general(source, destination, para_env)
General copy of a fm matrix to another fm matrix. Uses non-blocking MPI rather than ScaLAPACK.
subroutine, public cp_fm_get_diag(matrix, diag)
returns the diagonal elements of a fm
subroutine, public cp_fm_start_copy_general(source, destination, para_env, info)
Initiates the copy operation: get distribution data, post MPI isend and irecvs.
subroutine, public cp_fm_cleanup_copy_general(info)
Completes the copy operation: wait for comms clean up MPI state.
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
integer function, public cp_fm_get_mm_type()
...
subroutine, public cp_fm_vectorssum(matrix, sum_array, dir)
summing up all the elements along the matrix's i-th index or
subroutine, public cp_fm_write_unformatted(fm, unit)
...
subroutine, public cp_fm_get_element(matrix, irow_global, icol_global, alpha, local)
returns an element of a fm this value is valid on every cpu using this call is expensive
subroutine, public cp_fm_add_to_element(matrix, irow_global, icol_global, alpha)
...
subroutine, public cp_fm_setup(mm_type)
...
subroutine, public cp_fm_set_submatrix(fm, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
sets a submatrix of a full matrix fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = alpha*o...
subroutine, public cp_fm_read_unformatted(fm, unit)
...
subroutine, public cp_fm_vectorsnorm(matrix, norm_array)
find the inorm of each column norm_{j}= sqrt( \sum_{i} A_{ij}*A_{ij} )
subroutine, public cp_fm_write_info(matrix, io_unit)
Write nicely formatted info about the FM to the given I/O unit (including the underlying FM struct)
subroutine, public cp_fm_maxabsrownorm(matrix, a_max)
find the maximum over the rows of the sum of the absolute values of the elements of a given row = || ...
subroutine, public cp_fm_to_fm_submat_general(source, destination, nrows, ncols, s_firstrow, s_firstcol, d_firstrow, d_firstcol, global_context)
General copy of a submatrix of fm matrix to a submatrix of another fm matrix. The two matrices can ha...
subroutine, public cp_fm_to_fm_submat(msource, mtarget, nrow, ncol, s_firstrow, s_firstcol, t_firstrow, t_firstcol)
copy just a part ot the matrix
subroutine, public cp_fm_maxabsval(matrix, a_max, ir_max, ic_max)
find the maximum absolute value of the matrix element maxval(abs(matrix))
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
subroutine, public cp_fm_finish_copy_general(destination, info)
Completes the copy operation: wait for comms, unpack, clean up MPI state.
subroutine, public cp_fm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
gets a submatrix of a full matrix op(target_m)(1:n_rows,1:n_cols) =fm(start_row:start_row+n_rows,...
subroutine, public cp_fm_set_element(matrix, irow_global, icol_global, alpha)
sets an element of a matrix
subroutine, public cp_fm_init_random(matrix, ncol, start_col)
fills a matrix with random numbers
subroutine, public cp_fm_to_fm_triangular(msource, mtarget, uplo)
copy just a triangular matrix
integer function, public cp_fm_pilaenv(ictxt, prec)
...
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp)
creates a new full matrix with the given structure
subroutine, public cp_fm_write_formatted(fm, unit, header, value_format)
Write out a full matrix in plain text.
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public dp
Definition kinds.F:34
integer, parameter, public sp
Definition kinds.F:33
Interface to the message passing library MPI.
integer, parameter, public mp_proc_null
logical, parameter, public cp2k_is_parallel
integer, parameter, public mp_any_source
type(mp_request_type), parameter, public mp_request_null
Parallel (pseudo)random number generator (RNG) for multiple streams and substreams of random numbers.
integer, parameter, public uniform
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
keeps the information about the structure of a full matrix
Stores the state of a copy between cp_fm_start_copy_general and cp_fm_finish_copy_general.
just to build arrays of pointers to matrices
represent a full matrix
stores all the informations relevant to an mpi environment