(git:0d88fc2)
Loading...
Searching...
No Matches
cp_fm_types.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2026 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief represent a full matrix distributed on many processors
10!> \par History
11!> 3) separated structure object, removed globenv, renamed to full matrix
12!> many changes (fawzi 08.2002)
13!> \author Matthias Krack (22.05.2001)
14! **************************************************************************************************
25 USE kinds, ONLY: dp
33 USE parallel_rng_types, ONLY: uniform,&
35#include "../base/base_uses.f90"
36
37 IMPLICIT NONE
38
39 PRIVATE
40
41 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_types'
42 LOGICAL, PARAMETER :: debug_this_module = .true.
43 INTEGER, PARAMETER :: src_tag = 3, dest_tag = 5, send_tag = 7, recv_tag = 11
44
45 INTEGER, PRIVATE :: cp_fm_mm_type = 1
46
47 PUBLIC :: cp_fm_type, &
49
50 PUBLIC :: cp_fm_add_to_element, &
56 cp_fm_get_diag, & ! get diagonal
57 cp_fm_set_all, & ! set all elements and diagonal
58 cp_fm_set_all_submatrix, & ! set a submatrix to a given value
59 cp_fm_set_submatrix, & ! set a submatrix to given values
60 cp_fm_get_submatrix, & ! get a submatrix of given values
62 cp_fm_maxabsval, & ! find the maximum absolute value
63 cp_fm_maxabsrownorm, & ! find the maximum of the sum of the abs of the elements of a row
64 cp_fm_to_fm, & ! copy (parts of) a fm to a fm
65 cp_fm_vectorsnorm, & ! compute the norm of the column-vectors
66 cp_fm_vectorssum, & ! compute the sum of all elements of the column-vectors
67 cp_fm_to_fm_submat, & ! copy (parts of) a fm to a fm
73 cp_fm_write_unformatted, & ! writes a full matrix to an open unit
74 cp_fm_write_formatted, & ! writes a full matrix to an open unit
75 cp_fm_read_unformatted, & ! reads a full matrix from an open unit
76 cp_fm_setup, & ! allows to set flags for fms
79 cp_fm_to_fm_submat_general ! copy matrix across different contexts
80
81 PUBLIC :: cp_fm_pilaenv
82
83 INTERFACE cp_fm_to_fm
84 MODULE PROCEDURE cp_fm_to_fm_matrix, & ! a full matrix
85 cp_fm_to_fm_columns ! just a number of columns
86 END INTERFACE
87
88 INTERFACE cp_fm_release
89 MODULE PROCEDURE cp_fm_release_aa0, &
90 cp_fm_release_aa1, &
91 cp_fm_release_aa2, &
92 cp_fm_release_aa3, &
93 cp_fm_release_ap1, &
94 cp_fm_release_ap2, &
95 cp_fm_release_pa1, &
96 cp_fm_release_pa2, &
97 cp_fm_release_pa3, &
98 cp_fm_release_pp1, &
99 cp_fm_release_pp2
100 END INTERFACE
101
102! **************************************************************************************************
103!> \brief represent a full matrix
104!> \param name the name of the matrix, used for printing
105!> \param matrix_struct structure of this matrix
106!> \param local_data array with the data of the matrix (its contents
107!> depend on the matrix type used: in parallel runs it will be
108!> in scalapack format, in sequential, it will simply contain
109!> the matrix)
110!> \par History
111!> 08.2002 created [fawzi]
112!> \author fawzi
113! **************************************************************************************************
115! PRIVATE
116 CHARACTER(LEN=60) :: name = ""
117 TYPE(cp_fm_struct_type), POINTER :: matrix_struct => null()
118 REAL(kind=dp), DIMENSION(:, :), POINTER, CONTIGUOUS :: local_data => null()
119 END TYPE cp_fm_type
120
121! **************************************************************************************************
122!> \brief just to build arrays of pointers to matrices
123!> \param matrix the pointer to the matrix
124!> \par History
125!> 08.2002 created [fawzi]
126!> \author fawzi
127! **************************************************************************************************
129 TYPE(cp_fm_type), POINTER :: matrix => null()
130 END TYPE cp_fm_p_type
131
132! **************************************************************************************************
133!> \brief Stores the state of a copy between cp_fm_start_copy_general
134!> and cp_fm_finish_copy_general
135!> \par History
136!> Jan 2017 [Mark T]
137! **************************************************************************************************
139 INTEGER :: send_size = -1
140 INTEGER, DIMENSION(2) :: nlocal_recv = -1, nblock_src = -1, src_num_pe = -1 ! 1->row 2->col
141 TYPE(mp_request_type), DIMENSION(:), ALLOCATABLE :: send_request, recv_request
142 INTEGER, DIMENSION(:), ALLOCATABLE :: recv_disp
143 INTEGER, DIMENSION(:), POINTER :: recv_col_indices => null(), recv_row_indices => null()
144 INTEGER, DIMENSION(:, :), ALLOCATABLE :: src_blacs2mpi
145 REAL(kind=dp), DIMENSION(:), ALLOCATABLE :: recv_buf, send_buf
146 END TYPE copy_info_type
147
148CONTAINS
149
150! **************************************************************************************************
151!> \brief creates a new full matrix with the given structure
152!> \param matrix the matrix to be created
153!> \param matrix_struct the structure of matrix
154!> \param name ...
155!> \param nrow ...
156!> \param ncol ...
157!> \param set_zero ...
158!> \par History
159!> 08.2002 created [fawzi]
160!> \author Fawzi Mohamed
161!> \note
162!> preferred allocation routine
163! **************************************************************************************************
164 SUBROUTINE cp_fm_create(matrix, matrix_struct, name, nrow, ncol, set_zero)
165 TYPE(cp_fm_type), INTENT(OUT) :: matrix
166 TYPE(cp_fm_struct_type), INTENT(IN), TARGET :: matrix_struct
167 CHARACTER(LEN=*), INTENT(in), OPTIONAL :: name
168 INTEGER, INTENT(IN), OPTIONAL :: nrow, ncol
169 LOGICAL, INTENT(in), OPTIONAL :: set_zero
170
171 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_create'
172
173 INTEGER :: handle, ncol_global, ncol_local, &
174 nrow_global, nrow_local
175 TYPE(cp_blacs_env_type), POINTER :: context
176 TYPE(cp_fm_struct_type), POINTER :: fm_struct
177
178 CALL timeset(routinen, handle)
179
180 IF (PRESENT(nrow) .OR. PRESENT(ncol)) THEN
181 CALL cp_fm_struct_get(matrix_struct, nrow_global=nrow_global, ncol_global=ncol_global)
182 IF (PRESENT(nrow)) nrow_global = nrow
183 IF (PRESENT(ncol)) ncol_global = ncol
184 CALL cp_fm_struct_create(fm_struct, template_fmstruct=matrix_struct, &
185 nrow_global=nrow_global, ncol_global=ncol_global)
186
187 context => fm_struct%context
188 matrix%matrix_struct => fm_struct
189 CALL cp_fm_struct_retain(matrix%matrix_struct)
190
191 nrow_local = fm_struct%local_leading_dimension
192 ncol_local = max(1, fm_struct%ncol_locals(context%mepos(2)))
193
194 CALL cp_fm_struct_release(fm_struct)
195 ELSE
196
197 context => matrix_struct%context
198 matrix%matrix_struct => matrix_struct
199 CALL cp_fm_struct_retain(matrix%matrix_struct)
200
201 ! OK, we allocate here at least a 1 x 1 matrix
202 ! this must (and is) compatible with the descinit call
203 ! in cp_fm_struct
204 nrow_local = matrix_struct%local_leading_dimension
205 ncol_local = max(1, matrix_struct%ncol_locals(context%mepos(2)))
206 END IF
207
208 NULLIFY (matrix%local_data)
209
210 ALLOCATE (matrix%local_data(nrow_local, ncol_local))
211
212 IF (PRESENT(set_zero)) THEN
213 IF (set_zero) THEN
214 matrix%local_data(1:nrow_local, 1:ncol_local) = 0.0_dp
215 END IF
216 END IF
217
218 IF (PRESENT(name)) THEN
219 matrix%name = name
220 ELSE
221 matrix%name = 'full matrix'
222 END IF
223
224 CALL timestop(handle)
225
226 END SUBROUTINE cp_fm_create
227
228! **************************************************************************************************
229!> \brief releases a full matrix
230!> \param matrix the matrix to release
231!> \par History
232!> 08.2002 created [fawzi]
233!> \author Fawzi Mohamed
234! **************************************************************************************************
235 SUBROUTINE cp_fm_release_aa0(matrix)
236 TYPE(cp_fm_type), INTENT(INOUT) :: matrix
237
238 IF (ASSOCIATED(matrix%local_data)) THEN
239 DEALLOCATE (matrix%local_data)
240 NULLIFY (matrix%local_data)
241 END IF
242 matrix%name = ""
243 CALL cp_fm_struct_release(matrix%matrix_struct)
244
245 END SUBROUTINE cp_fm_release_aa0
246
247! **************************************************************************************************
248!> \brief ...
249!> \param matrices ...
250! **************************************************************************************************
251 SUBROUTINE cp_fm_release_aa1(matrices)
252 TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: matrices
253
254 INTEGER :: i
255
256 IF (ALLOCATED(matrices)) THEN
257 DO i = 1, SIZE(matrices)
258 CALL cp_fm_release(matrices(i))
259 END DO
260 DEALLOCATE (matrices)
261 END IF
262 END SUBROUTINE cp_fm_release_aa1
263
264! **************************************************************************************************
265!> \brief ...
266!> \param matrices ...
267! **************************************************************************************************
268 SUBROUTINE cp_fm_release_aa2(matrices)
269 TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :) :: matrices
270
271 INTEGER :: i, j
272
273 IF (ALLOCATED(matrices)) THEN
274 DO i = 1, SIZE(matrices, 1)
275 DO j = 1, SIZE(matrices, 2)
276 CALL cp_fm_release(matrices(i, j))
277 END DO
278 END DO
279 DEALLOCATE (matrices)
280 END IF
281 END SUBROUTINE cp_fm_release_aa2
282
283! **************************************************************************************************
284!> \brief ...
285!> \param matrices ...
286! **************************************************************************************************
287 SUBROUTINE cp_fm_release_aa3(matrices)
288 TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :, :) :: matrices
289
290 INTEGER :: i, j, k
291
292 IF (ALLOCATED(matrices)) THEN
293 DO i = 1, SIZE(matrices, 1)
294 DO j = 1, SIZE(matrices, 2)
295 DO k = 1, SIZE(matrices, 3)
296 CALL cp_fm_release(matrices(i, j, k))
297 END DO
298 END DO
299 END DO
300 DEALLOCATE (matrices)
301 END IF
302 END SUBROUTINE cp_fm_release_aa3
303
304! **************************************************************************************************
305!> \brief ...
306!> \param matrices ...
307! **************************************************************************************************
308 SUBROUTINE cp_fm_release_pa1(matrices)
309 TYPE(cp_fm_type), DIMENSION(:), POINTER :: matrices
310
311 INTEGER :: i
312
313 IF (ASSOCIATED(matrices)) THEN
314 DO i = 1, SIZE(matrices)
315 CALL cp_fm_release(matrices(i))
316 END DO
317 DEALLOCATE (matrices)
318 NULLIFY (matrices)
319 END IF
320 END SUBROUTINE cp_fm_release_pa1
321
322! **************************************************************************************************
323!> \brief ...
324!> \param matrices ...
325! **************************************************************************************************
326 SUBROUTINE cp_fm_release_pa2(matrices)
327 TYPE(cp_fm_type), DIMENSION(:, :), POINTER :: matrices
328
329 INTEGER :: i, j
330
331 IF (ASSOCIATED(matrices)) THEN
332 DO i = 1, SIZE(matrices, 1)
333 DO j = 1, SIZE(matrices, 2)
334 CALL cp_fm_release(matrices(i, j))
335 END DO
336 END DO
337 DEALLOCATE (matrices)
338 NULLIFY (matrices)
339 END IF
340 END SUBROUTINE cp_fm_release_pa2
341
342! **************************************************************************************************
343!> \brief ...
344!> \param matrices ...
345! **************************************************************************************************
346 SUBROUTINE cp_fm_release_pa3(matrices)
347 TYPE(cp_fm_type), DIMENSION(:, :, :), POINTER :: matrices
348
349 INTEGER :: i, j, k
350
351 IF (ASSOCIATED(matrices)) THEN
352 DO i = 1, SIZE(matrices, 1)
353 DO j = 1, SIZE(matrices, 2)
354 DO k = 1, SIZE(matrices, 3)
355 CALL cp_fm_release(matrices(i, j, k))
356 END DO
357 END DO
358 END DO
359 DEALLOCATE (matrices)
360 NULLIFY (matrices)
361 END IF
362 END SUBROUTINE cp_fm_release_pa3
363
364! **************************************************************************************************
365!> \brief ...
366!> \param matrices ...
367! **************************************************************************************************
368 SUBROUTINE cp_fm_release_ap1(matrices)
369 TYPE(cp_fm_p_type), ALLOCATABLE, DIMENSION(:) :: matrices
370
371 INTEGER :: i
372
373 IF (ALLOCATED(matrices)) THEN
374 DO i = 1, SIZE(matrices)
375 CALL cp_fm_release(matrices(i)%matrix)
376 DEALLOCATE (matrices(i)%matrix)
377 END DO
378 DEALLOCATE (matrices)
379 END IF
380 END SUBROUTINE cp_fm_release_ap1
381
382! **************************************************************************************************
383!> \brief ...
384!> \param matrices ...
385! **************************************************************************************************
386 SUBROUTINE cp_fm_release_ap2(matrices)
387 TYPE(cp_fm_p_type), ALLOCATABLE, DIMENSION(:, :) :: matrices
388
389 INTEGER :: i, j
390
391 IF (ALLOCATED(matrices)) THEN
392 DO i = 1, SIZE(matrices, 1)
393 DO j = 1, SIZE(matrices, 2)
394 CALL cp_fm_release(matrices(i, j)%matrix)
395 DEALLOCATE (matrices(i, j)%matrix)
396 END DO
397 END DO
398 DEALLOCATE (matrices)
399 END IF
400 END SUBROUTINE cp_fm_release_ap2
401
402! **************************************************************************************************
403!> \brief ...
404!> \param matrices ...
405! **************************************************************************************************
406 SUBROUTINE cp_fm_release_pp1(matrices)
407 TYPE(cp_fm_p_type), DIMENSION(:), POINTER :: matrices
408
409 INTEGER :: i
410
411 IF (ASSOCIATED(matrices)) THEN
412 DO i = 1, SIZE(matrices)
413 CALL cp_fm_release(matrices(i)%matrix)
414 DEALLOCATE (matrices(i)%matrix)
415 END DO
416 DEALLOCATE (matrices)
417 NULLIFY (matrices)
418 END IF
419 END SUBROUTINE cp_fm_release_pp1
420
421! **************************************************************************************************
422!> \brief ...
423!> \param matrices ...
424! **************************************************************************************************
425 SUBROUTINE cp_fm_release_pp2(matrices)
426 TYPE(cp_fm_p_type), DIMENSION(:, :), POINTER :: matrices
427
428 INTEGER :: i, j
429
430 IF (ASSOCIATED(matrices)) THEN
431 DO i = 1, SIZE(matrices, 1)
432 DO j = 1, SIZE(matrices, 2)
433 CALL cp_fm_release(matrices(i, j)%matrix)
434 DEALLOCATE (matrices(i, j)%matrix)
435 END DO
436 END DO
437 DEALLOCATE (matrices)
438 NULLIFY (matrices)
439 END IF
440 END SUBROUTINE cp_fm_release_pp2
441
442! **************************************************************************************************
443!> \brief fills a matrix with random numbers
444!> \param matrix : to be initialized
445!> \param ncol : numbers of cols to fill
446!> \param start_col : starting at coll number
447!> \author Joost VandeVondele
448!> \note
449!> the value of a_ij is independent of the number of cpus
450! **************************************************************************************************
451 SUBROUTINE cp_fm_init_random(matrix, ncol, start_col)
452 TYPE(cp_fm_type), INTENT(IN) :: matrix
453 INTEGER, INTENT(IN), OPTIONAL :: ncol, start_col
454
455 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_init_random'
456
457 INTEGER :: handle, icol_global, icol_local, irow_local, my_ncol, my_start_col, ncol_global, &
458 ncol_local, nrow_global, nrow_local
459 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
460 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: buff
461 REAL(kind=dp), CONTIGUOUS, DIMENSION(:, :), &
462 POINTER :: local_data
463 REAL(kind=dp), DIMENSION(3, 2), SAVE :: &
464 seed = reshape([1.0_dp, 2.0_dp, 3.0_dp, 4.0_dp, 5.0_dp, 6.0_dp], [3, 2])
465 TYPE(rng_stream_type) :: rng
466
467 CALL timeset(routinen, handle)
468
469 ! guarantee same seed over all tasks
470 CALL matrix%matrix_struct%para_env%bcast(seed, 0)
471
472 rng = rng_stream_type("cp_fm_init_random_stream", distribution_type=uniform, &
473 extended_precision=.true., seed=seed)
474
475 CALL cp_fm_get_info(matrix, nrow_global=nrow_global, ncol_global=ncol_global, &
476 nrow_local=nrow_local, ncol_local=ncol_local, &
477 local_data=local_data, &
478 row_indices=row_indices, col_indices=col_indices)
479
480 my_start_col = 1
481 IF (PRESENT(start_col)) my_start_col = start_col
482 my_ncol = matrix%matrix_struct%ncol_global
483 IF (PRESENT(ncol)) my_ncol = ncol
484
485 IF (ncol_global < (my_start_col + my_ncol - 1)) &
486 cpabort("ncol_global>=(my_start_col+my_ncol-1)")
487
488 ALLOCATE (buff(nrow_global))
489
490 ! each global row has its own substream, in order to reach the stream for the local col,
491 ! we just reset to the next substream
492 ! following this, we fill the full buff with random numbers, and pick those we need
493 icol_global = 0
494 DO icol_local = 1, ncol_local
495 cpassert(col_indices(icol_local) > icol_global)
496 DO
497 CALL rng%reset_to_next_substream()
498 icol_global = icol_global + 1
499 IF (icol_global == col_indices(icol_local)) EXIT
500 END DO
501 CALL rng%fill(buff)
502 DO irow_local = 1, nrow_local
503 local_data(irow_local, icol_local) = buff(row_indices(irow_local))
504 END DO
505 END DO
506
507 DEALLOCATE (buff)
508
509 ! store seed before deletion (unclear if this is the proper seed)
510
511 ! Note that, the initial state (rng%ig) instead of the current state (rng%cg) is stored in the
512 ! seed variable. As a consequence, each invocation of cp_fm_init_random uses exactly the same
513 ! stream of random numbers. While this seems odd and contrary to the original design,
514 ! it was probably introduced to improve reproducibility.
515 ! See also https://github.com/cp2k/cp2k/pull/506
516 CALL rng%get(ig=seed)
517
518 CALL timestop(handle)
519
520 END SUBROUTINE cp_fm_init_random
521
522! **************************************************************************************************
523!> \brief set all elements of a matrix to the same value,
524!> and optionally the diagonal to a different one
525!> \param matrix input matrix
526!> \param alpha scalar used to set all elements of the matrix
527!> \param beta scalar used to set diagonal of the matrix
528!> \note
529!> can be used to zero a matrix
530!> can be used to create a unit matrix (I-matrix) alpha=0.0_dp beta=1.0_dp
531! **************************************************************************************************
532 SUBROUTINE cp_fm_set_all(matrix, alpha, beta)
533
534 TYPE(cp_fm_type), INTENT(IN) :: matrix
535 REAL(kind=dp), INTENT(IN) :: alpha
536 REAL(kind=dp), INTENT(IN), OPTIONAL :: beta
537
538 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_set_all'
539
540 INTEGER :: handle, i, n
541
542 CALL timeset(routinen, handle)
543
544 matrix%local_data(:, :) = alpha
545
546 IF (PRESENT(beta)) THEN
547 n = min(matrix%matrix_struct%nrow_global, matrix%matrix_struct%ncol_global)
548 DO i = 1, n
549 CALL cp_fm_set_element(matrix, i, i, beta)
550 END DO
551 END IF
552
553 CALL timestop(handle)
554
555 END SUBROUTINE cp_fm_set_all
556
557! **************************************************************************************************
558!> \brief returns the diagonal elements of a fm
559!> \param matrix ...
560!> \param diag ...
561! **************************************************************************************************
562 SUBROUTINE cp_fm_get_diag(matrix, diag)
563
564 ! arguments
565 TYPE(cp_fm_type), INTENT(IN) :: matrix
566 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: diag
567
568 ! locals
569 INTEGER :: i, nrow_global
570
571#if defined(__parallel)
572 INTEGER, DIMENSION(9) :: desca
573 TYPE(cp_blacs_env_type), POINTER :: context
574 INTEGER :: icol_local, ipcol, iprow, irow_local, mypcol, myprow, npcol, &
575 nprow
576 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
577#endif
578
579 CALL cp_fm_get_info(matrix, nrow_global=nrow_global)
580
581#if defined(__parallel)
582 diag = 0.0_dp
583 context => matrix%matrix_struct%context
584 myprow = context%mepos(1)
585 mypcol = context%mepos(2)
586 nprow = context%num_pe(1)
587 npcol = context%num_pe(2)
588
589 a => matrix%local_data
590 desca(:) = matrix%matrix_struct%descriptor(:)
591
592 DO i = 1, nrow_global
593 CALL infog2l(i, i, desca, nprow, npcol, myprow, mypcol, &
594 irow_local, icol_local, iprow, ipcol)
595 IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
596 diag(i) = a(irow_local, icol_local)
597 END IF
598 END DO
599#else
600 DO i = 1, nrow_global
601 diag(i) = matrix%local_data(i, i)
602 END DO
603#endif
604 CALL matrix%matrix_struct%para_env%sum(diag)
605
606 END SUBROUTINE cp_fm_get_diag
607
608! **************************************************************************************************
609!> \brief returns an element of a fm
610!> this value is valid on every cpu
611!> using this call is expensive
612!> \param matrix the matrix to read
613!> \param irow_global the row
614!> \param icol_global the col
615!> \param alpha the value of matrix(irow_global, icol_global)
616!> \param local true if the element is on this cpu, false otherwise
617!> \note
618!> - modified semantics. now this function always returns the value
619!> previously the value was zero on cpus that didn't own the relevant
620!> part of the matrix (Joost VandeVondele, May 2003)
621!> - usage of the function should be avoided, as it is likely to rather slow
622!> using row_indices/col_indices/local_data + some smart scheme normally
623!> yields a real parallel code
624! **************************************************************************************************
625 SUBROUTINE cp_fm_get_element(matrix, irow_global, icol_global, alpha, local)
626
627 ! arguments
628 TYPE(cp_fm_type), INTENT(IN) :: matrix
629 REAL(kind=dp), INTENT(OUT) :: alpha
630 INTEGER, INTENT(IN) :: icol_global, &
631 irow_global
632 LOGICAL, INTENT(OUT), OPTIONAL :: local
633
634 ! locals
635#if defined(__parallel)
636 INTEGER, DIMENSION(9) :: desca
637 TYPE(cp_blacs_env_type), POINTER :: context
638 INTEGER :: icol_local, ipcol, iprow, irow_local, mypcol, myprow, npcol, &
639 nprow
640 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
641#endif
642
643#if defined(__parallel)
644 context => matrix%matrix_struct%context
645 myprow = context%mepos(1)
646 mypcol = context%mepos(2)
647 nprow = context%num_pe(1)
648 npcol = context%num_pe(2)
649
650 a => matrix%local_data
651 desca(:) = matrix%matrix_struct%descriptor(:)
652
653 CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
654 irow_local, icol_local, iprow, ipcol)
655
656 IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
657 alpha = a(irow_local, icol_local)
658 CALL context%dgebs2d('All', ' ', 1, 1, alpha, 1)
659 IF (PRESENT(local)) local = .true.
660 ELSE
661 CALL context%dgebr2d('All', ' ', 1, 1, alpha, 1, iprow, ipcol)
662 IF (PRESENT(local)) local = .false.
663 END IF
664
665#else
666 IF (PRESENT(local)) local = .true.
667 alpha = matrix%local_data(irow_global, icol_global)
668#endif
669
670 END SUBROUTINE cp_fm_get_element
671
672! **************************************************************************************************
673!> \brief sets an element of a matrix
674!> \param matrix ...
675!> \param irow_global ...
676!> \param icol_global ...
677!> \param alpha ...
678!> \note
679!> we expect all cpus to have the same arguments in the call to this function
680!> (otherwise one should use local_data tricks)
681! **************************************************************************************************
682 SUBROUTINE cp_fm_set_element(matrix, irow_global, icol_global, alpha)
683 TYPE(cp_fm_type), INTENT(IN) :: matrix
684 INTEGER, INTENT(IN) :: irow_global, icol_global
685 REAL(kind=dp), INTENT(IN) :: alpha
686
687 INTEGER :: mypcol, myprow, npcol, nprow
688 TYPE(cp_blacs_env_type), POINTER :: context
689#if defined(__parallel)
690 INTEGER :: icol_local, ipcol, iprow, &
691 irow_local
692 INTEGER, DIMENSION(9) :: desca
693 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
694#endif
695
696 context => matrix%matrix_struct%context
697 myprow = context%mepos(1)
698 mypcol = context%mepos(2)
699 nprow = context%num_pe(1)
700 npcol = context%num_pe(2)
701
702#if defined(__parallel)
703
704 a => matrix%local_data
705
706 desca(:) = matrix%matrix_struct%descriptor(:)
707
708 CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
709 irow_local, icol_local, iprow, ipcol)
710
711 IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
712 a(irow_local, icol_local) = alpha
713 END IF
714
715#else
716
717 matrix%local_data(irow_global, icol_global) = alpha
718
719#endif
720 END SUBROUTINE cp_fm_set_element
721
722! **************************************************************************************************
723!> \brief sets a submatrix of a full matrix
724!> fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
725!> = alpha*op(new_values)(1:n_rows,1:n_cols)+ beta
726!> * fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
727!> \param fm the full to change
728!> \param new_values a replicated full matrix with the new values
729!> \param start_row the starting row of b_matrix (defaults to 1)
730!> \param start_col the starting col of b_matrix (defaults to 1)
731!> \param n_rows the number of row to change in b (defaults to
732!> size(op(new_values),1))
733!> \param n_cols the number of columns to change in b (defaults to
734!> size(op(new_values),2))
735!> \param alpha rescaling factor for the new values (defaults to 1.0)
736!> \param beta rescaling factor for the old values (defaults to 0.0)
737!> \param transpose if new_values should be transposed: if true
738!> op(new_values)=new_values^T, else op(new_values)=new_values
739!> (defaults to false)
740!> \par History
741!> 07.2002 created borrowing from Joost's blacs_replicated_copy [fawzi]
742!> \author Fawzi Mohamed
743!> \note
744!> optimized for full column updates and alpha=1.0, beta=0.0
745!> the new_values need to be valid on all cpus
746! **************************************************************************************************
747 SUBROUTINE cp_fm_set_submatrix(fm, new_values, start_row, &
748 start_col, n_rows, n_cols, alpha, beta, transpose)
749 TYPE(cp_fm_type), INTENT(IN) :: fm
750 REAL(kind=dp), DIMENSION(:, :), INTENT(in) :: new_values
751 INTEGER, INTENT(in), OPTIONAL :: start_row, start_col, n_rows, n_cols
752 REAL(kind=dp), INTENT(in), OPTIONAL :: alpha, beta
753 LOGICAL, INTENT(in), OPTIONAL :: transpose
754
755 INTEGER :: i, i0, j, j0, ncol, ncol_block, &
756 ncol_global, ncol_local, nrow, &
757 nrow_block, nrow_global, nrow_local, &
758 this_col, this_row
759 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
760 LOGICAL :: tr_a
761 REAL(kind=dp) :: al, be
762 REAL(kind=dp), DIMENSION(:, :), POINTER :: full_block
763
764 al = 1.0_dp; be = 0.0_dp; i0 = 1; j0 = 1; tr_a = .false.
765
766 IF (PRESENT(alpha)) al = alpha
767 IF (PRESENT(beta)) be = beta
768 IF (PRESENT(start_row)) i0 = start_row
769 IF (PRESENT(start_col)) j0 = start_col
770 IF (PRESENT(transpose)) tr_a = transpose
771 IF (tr_a) THEN
772 nrow = SIZE(new_values, 2)
773 ncol = SIZE(new_values, 1)
774 ELSE
775 nrow = SIZE(new_values, 1)
776 ncol = SIZE(new_values, 2)
777 END IF
778 IF (PRESENT(n_rows)) nrow = n_rows
779 IF (PRESENT(n_cols)) ncol = n_cols
780
781 full_block => fm%local_data
782
783 CALL cp_fm_get_info(matrix=fm, &
784 nrow_global=nrow_global, ncol_global=ncol_global, &
785 nrow_block=nrow_block, ncol_block=ncol_block, &
786 nrow_local=nrow_local, ncol_local=ncol_local, &
787 row_indices=row_indices, col_indices=col_indices)
788
789 IF (al == 1.0 .AND. be == 0.0) THEN
790 DO j = 1, ncol_local
791 this_col = col_indices(j) - j0 + 1
792 IF (this_col >= 1 .AND. this_col <= ncol) THEN
793 IF (tr_a) THEN
794 IF (i0 == 1 .AND. nrow_global == nrow) THEN
795 DO i = 1, nrow_local
796 full_block(i, j) = new_values(this_col, row_indices(i))
797 END DO
798 ELSE
799 DO i = 1, nrow_local
800 this_row = row_indices(i) - i0 + 1
801 IF (this_row >= 1 .AND. this_row <= nrow) THEN
802 full_block(i, j) = new_values(this_col, this_row)
803 END IF
804 END DO
805 END IF
806 ELSE
807 IF (i0 == 1 .AND. nrow_global == nrow) THEN
808 DO i = 1, nrow_local
809 full_block(i, j) = new_values(row_indices(i), this_col)
810 END DO
811 ELSE
812 DO i = 1, nrow_local
813 this_row = row_indices(i) - i0 + 1
814 IF (this_row >= 1 .AND. this_row <= nrow) THEN
815 full_block(i, j) = new_values(this_row, this_col)
816 END IF
817 END DO
818 END IF
819 END IF
820 END IF
821 END DO
822 ELSE
823 DO j = 1, ncol_local
824 this_col = col_indices(j) - j0 + 1
825 IF (this_col >= 1 .AND. this_col <= ncol) THEN
826 IF (tr_a) THEN
827 DO i = 1, nrow_local
828 this_row = row_indices(i) - i0 + 1
829 IF (this_row >= 1 .AND. this_row <= nrow) THEN
830 full_block(i, j) = al*new_values(this_col, this_row) + &
831 be*full_block(i, j)
832 END IF
833 END DO
834 ELSE
835 DO i = 1, nrow_local
836 this_row = row_indices(i) - i0 + 1
837 IF (this_row >= 1 .AND. this_row <= nrow) THEN
838 full_block(i, j) = al*new_values(this_row, this_col) + &
839 be*full_block(i, j)
840 END IF
841 END DO
842 END IF
843 END IF
844 END DO
845 END IF
846
847 END SUBROUTINE cp_fm_set_submatrix
848
849! **************************************************************************************************
850!> \brief sets a submatrix of a full matrix to a given value
851!> fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = value
852!> \param fm the full to change
853!> \param new_value ...
854!> \param start_row the starting row of matrix
855!> \param start_col the starting col of matrix
856!> \param n_rows the number of rows to change
857!> \param n_cols the number of columns to change
858!> \par History
859!> 07.2002 created borrowing from Joost's blacs_replicated_copy [fawzi]
860!> 12.2025 created from cp_fm_set_submatrix
861!> \author JGH
862! **************************************************************************************************
863 SUBROUTINE cp_fm_set_all_submatrix(fm, new_value, start_row, start_col, n_rows, n_cols)
864 TYPE(cp_fm_type), INTENT(IN) :: fm
865 REAL(kind=dp), INTENT(in) :: new_value
866 INTEGER, INTENT(in) :: start_row, start_col, n_rows, n_cols
867
868 INTEGER :: i, i0, j, j0, ncol_global, ncol_local, &
869 nrow_global, nrow_local, this_col, &
870 this_row
871 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
872 REAL(kind=dp), DIMENSION(:, :), POINTER :: full_block
873
874 full_block => fm%local_data
875
876 CALL cp_fm_get_info(matrix=fm, &
877 nrow_global=nrow_global, ncol_global=ncol_global, &
878 nrow_local=nrow_local, ncol_local=ncol_local, &
879 row_indices=row_indices, col_indices=col_indices)
880
881 i0 = start_row
882 j0 = start_col
883 DO j = 1, ncol_local
884 this_col = col_indices(j) - j0 + 1
885 IF (this_col >= 1 .AND. this_col <= n_cols) THEN
886 DO i = 1, nrow_local
887 this_row = row_indices(i) - i0 + 1
888 IF (this_row >= 1 .AND. this_row <= n_rows) THEN
889 full_block(i, j) = new_value
890 END IF
891 END DO
892 END IF
893 END DO
894
895 END SUBROUTINE cp_fm_set_all_submatrix
896
897! **************************************************************************************************
898!> \brief gets a submatrix of a full matrix
899!> op(target_m)(1:n_rows,1:n_cols)
900!> =fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
901!> target_m is replicated on all cpus
902!> using this call is expensive
903!> \param fm the full you want to get the info from
904!> \param target_m a replicated full matrix that will contain the result
905!> \param start_row the starting row of b_matrix (defaults to 1)
906!> \param start_col the starting col of b_matrix (defaults to 1)
907!> \param n_rows the number of row to change in b (defaults to
908!> size(op(new_values),1))
909!> \param n_cols the number of columns to change in b (defaults to
910!> size(op(new_values),2))
911!> \param transpose if target_m should be transposed: if true
912!> op(target_m)=target_m^T, else op(target_m)=target_m
913!> (defaults to false)
914!> \par History
915!> 07.2002 created borrowing from Joost's blacs_replicated_copy [fawzi]
916!> \author Fawzi Mohamed
917!> \note
918!> optimized for full column updates. Zeros out a little too much
919!> of target_m
920!> the target_m is replicated and valid on all cpus
921! **************************************************************************************************
922 SUBROUTINE cp_fm_get_submatrix(fm, target_m, start_row, &
923 start_col, n_rows, n_cols, transpose)
924 TYPE(cp_fm_type), INTENT(IN) :: fm
925 REAL(kind=dp), DIMENSION(:, :), INTENT(out) :: target_m
926 INTEGER, INTENT(in), OPTIONAL :: start_row, start_col, n_rows, n_cols
927 LOGICAL, INTENT(in), OPTIONAL :: transpose
928
929 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_get_submatrix'
930
931 INTEGER :: handle, i, i0, j, j0, ncol, ncol_global, &
932 ncol_local, nrow, nrow_global, &
933 nrow_local, this_col, this_row
934 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
935 LOGICAL :: tr_a
936 REAL(kind=dp), DIMENSION(:, :), POINTER :: full_block
937 TYPE(mp_para_env_type), POINTER :: para_env
938
939 CALL timeset(routinen, handle)
940
941 i0 = 1; j0 = 1; tr_a = .false.
942
943 IF (PRESENT(start_row)) i0 = start_row
944 IF (PRESENT(start_col)) j0 = start_col
945 IF (PRESENT(transpose)) tr_a = transpose
946 IF (tr_a) THEN
947 nrow = SIZE(target_m, 2)
948 ncol = SIZE(target_m, 1)
949 ELSE
950 nrow = SIZE(target_m, 1)
951 ncol = SIZE(target_m, 2)
952 END IF
953 IF (PRESENT(n_rows)) nrow = n_rows
954 IF (PRESENT(n_cols)) ncol = n_cols
955
956 para_env => fm%matrix_struct%para_env
957
958 full_block => fm%local_data
959#if defined(__parallel)
960 ! zero-out whole target_m
961 IF (SIZE(target_m, 1)*SIZE(target_m, 2) /= 0) THEN
962 CALL dcopy(SIZE(target_m, 1)*SIZE(target_m, 2), [0.0_dp], 0, target_m, 1)
963 END IF
964#endif
965
966 CALL cp_fm_get_info(matrix=fm, &
967 nrow_global=nrow_global, ncol_global=ncol_global, &
968 nrow_local=nrow_local, ncol_local=ncol_local, &
969 row_indices=row_indices, col_indices=col_indices)
970
971 DO j = 1, ncol_local
972 this_col = col_indices(j) - j0 + 1
973 IF (this_col >= 1 .AND. this_col <= ncol) THEN
974 IF (tr_a) THEN
975 IF (i0 == 1 .AND. nrow_global == nrow) THEN
976 DO i = 1, nrow_local
977 target_m(this_col, row_indices(i)) = full_block(i, j)
978 END DO
979 ELSE
980 DO i = 1, nrow_local
981 this_row = row_indices(i) - i0 + 1
982 IF (this_row >= 1 .AND. this_row <= nrow) THEN
983 target_m(this_col, this_row) = full_block(i, j)
984 END IF
985 END DO
986 END IF
987 ELSE
988 IF (i0 == 1 .AND. nrow_global == nrow) THEN
989 DO i = 1, nrow_local
990 target_m(row_indices(i), this_col) = full_block(i, j)
991 END DO
992 ELSE
993 DO i = 1, nrow_local
994 this_row = row_indices(i) - i0 + 1
995 IF (this_row >= 1 .AND. this_row <= nrow) THEN
996 target_m(this_row, this_col) = full_block(i, j)
997 END IF
998 END DO
999 END IF
1000 END IF
1001 END IF
1002 END DO
1003
1004 CALL para_env%sum(target_m)
1005
1006 CALL timestop(handle)
1007
1008 END SUBROUTINE cp_fm_get_submatrix
1009
1010! **************************************************************************************************
1011!> \brief returns all kind of information about the full matrix
1012!> \param matrix ...
1013!> \param name ...
1014!> \param nrow_global ...
1015!> \param ncol_global ...
1016!> \param nrow_block ...
1017!> \param ncol_block ...
1018!> \param nrow_local ...
1019!> \param ncol_local ...
1020!> \param row_indices ...
1021!> \param col_indices ...
1022!> \param local_data ...
1023!> \param context ...
1024!> \param nrow_locals ...
1025!> \param ncol_locals ...
1026!> \param matrix_struct ...
1027!> \param para_env ...
1028!> \note
1029!> see also cp_fm_struct for explanation
1030!> - nrow_local, ncol_local, row_indices, col_indices, local_data are hooks for efficient
1031!> access to the local blacs block
1032! **************************************************************************************************
1033 SUBROUTINE cp_fm_get_info(matrix, name, nrow_global, ncol_global, &
1034 nrow_block, ncol_block, nrow_local, ncol_local, &
1035 row_indices, col_indices, local_data, context, &
1036 nrow_locals, ncol_locals, matrix_struct, para_env)
1037
1038 TYPE(cp_fm_type), INTENT(IN) :: matrix
1039 CHARACTER(LEN=*), INTENT(OUT), OPTIONAL :: name
1040 INTEGER, INTENT(OUT), OPTIONAL :: nrow_global, ncol_global, nrow_block, &
1041 ncol_block, nrow_local, ncol_local
1042 INTEGER, DIMENSION(:), OPTIONAL, POINTER :: row_indices, col_indices
1043 REAL(kind=dp), CONTIGUOUS, DIMENSION(:, :), &
1044 OPTIONAL, POINTER :: local_data
1045 TYPE(cp_blacs_env_type), OPTIONAL, POINTER :: context
1046 INTEGER, DIMENSION(:), OPTIONAL, POINTER :: nrow_locals, ncol_locals
1047 TYPE(cp_fm_struct_type), OPTIONAL, POINTER :: matrix_struct
1048 TYPE(mp_para_env_type), OPTIONAL, POINTER :: para_env
1049
1050 IF (PRESENT(name)) name = matrix%name
1051 IF (PRESENT(matrix_struct)) matrix_struct => matrix%matrix_struct
1052 IF (PRESENT(local_data)) local_data => matrix%local_data ! not hiding things anymore :-(
1053
1054 CALL cp_fm_struct_get(matrix%matrix_struct, nrow_local=nrow_local, &
1055 ncol_local=ncol_local, nrow_global=nrow_global, &
1056 ncol_global=ncol_global, nrow_block=nrow_block, &
1057 ncol_block=ncol_block, row_indices=row_indices, &
1058 col_indices=col_indices, nrow_locals=nrow_locals, &
1059 ncol_locals=ncol_locals, context=context, para_env=para_env)
1060
1061 END SUBROUTINE cp_fm_get_info
1062
1063! **************************************************************************************************
1064!> \brief Write nicely formatted info about the FM to the given I/O unit (including the underlying FM struct)
1065!> \param matrix a cp_fm_type instance
1066!> \param io_unit the I/O unit to use for writing
1067! **************************************************************************************************
1068 SUBROUTINE cp_fm_write_info(matrix, io_unit)
1069 TYPE(cp_fm_type), INTENT(IN) :: matrix
1070 INTEGER, INTENT(IN) :: io_unit
1071
1072 WRITE (io_unit, '(/,A,A12)') "CP_FM | Name: ", matrix%name
1073 CALL cp_fm_struct_write_info(matrix%matrix_struct, io_unit)
1074 END SUBROUTINE cp_fm_write_info
1075
1076! **************************************************************************************************
1077!> \brief find the maximum absolute value of the matrix element
1078!> maxval(abs(matrix))
1079!> \param matrix ...
1080!> \param a_max ...
1081!> \param ir_max ...
1082!> \param ic_max ...
1083! **************************************************************************************************
1084 SUBROUTINE cp_fm_maxabsval(matrix, a_max, ir_max, ic_max)
1085 TYPE(cp_fm_type), INTENT(IN) :: matrix
1086 REAL(kind=dp), INTENT(OUT) :: a_max
1087 INTEGER, INTENT(OUT), OPTIONAL :: ir_max, ic_max
1088
1089 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_maxabsval'
1090
1091 INTEGER :: handle, i, ic_max_local, ir_max_local, &
1092 j, mepos, ncol_local, nrow_local, &
1093 num_pe
1094 INTEGER, ALLOCATABLE, DIMENSION(:) :: ic_max_vec, ir_max_vec
1095 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1096 REAL(dp) :: my_max
1097 REAL(dp), ALLOCATABLE, DIMENSION(:) :: a_max_vec
1098 REAL(kind=dp), DIMENSION(:, :), POINTER :: my_block
1099
1100 CALL timeset(routinen, handle)
1101
1102 my_block => matrix%local_data
1103
1104 CALL cp_fm_get_info(matrix, nrow_local=nrow_local, ncol_local=ncol_local, &
1105 row_indices=row_indices, col_indices=col_indices)
1106
1107 a_max = maxval(abs(my_block(1:nrow_local, 1:ncol_local)))
1108
1109 IF (PRESENT(ir_max)) THEN
1110 num_pe = matrix%matrix_struct%para_env%num_pe
1111 mepos = matrix%matrix_struct%para_env%mepos
1112 ALLOCATE (ir_max_vec(0:num_pe - 1))
1113 ir_max_vec(0:num_pe - 1) = 0
1114 ALLOCATE (ic_max_vec(0:num_pe - 1))
1115 ic_max_vec(0:num_pe - 1) = 0
1116 ALLOCATE (a_max_vec(0:num_pe - 1))
1117 a_max_vec(0:num_pe - 1) = 0.0_dp
1118 my_max = 0.0_dp
1119
1120 IF ((ncol_local > 0) .AND. (nrow_local > 0)) THEN
1121 DO i = 1, ncol_local
1122 DO j = 1, nrow_local
1123 IF (abs(my_block(j, i)) > my_max) THEN
1124 my_max = my_block(j, i)
1125 ir_max_local = j
1126 ic_max_local = i
1127 END IF
1128 END DO
1129 END DO
1130
1131 a_max_vec(mepos) = my_max
1132 ir_max_vec(mepos) = row_indices(ir_max_local)
1133 ic_max_vec(mepos) = col_indices(ic_max_local)
1134
1135 END IF
1136
1137 CALL matrix%matrix_struct%para_env%sum(a_max_vec)
1138 CALL matrix%matrix_struct%para_env%sum(ir_max_vec)
1139 CALL matrix%matrix_struct%para_env%sum(ic_max_vec)
1140
1141 my_max = 0.0_dp
1142 DO i = 0, num_pe - 1
1143 IF (a_max_vec(i) > my_max) THEN
1144 ir_max = ir_max_vec(i)
1145 ic_max = ic_max_vec(i)
1146 END IF
1147 END DO
1148
1149 DEALLOCATE (ir_max_vec, ic_max_vec, a_max_vec)
1150 cpassert(ic_max > 0)
1151 cpassert(ir_max > 0)
1152
1153 END IF
1154
1155 CALL matrix%matrix_struct%para_env%max(a_max)
1156
1157 CALL timestop(handle)
1158
1159 END SUBROUTINE cp_fm_maxabsval
1160
1161! **************************************************************************************************
1162!> \brief find the maximum over the rows of the sum of the absolute values of the elements of a given row
1163!> = || A ||_infinity
1164!> \param matrix ...
1165!> \param a_max ...
1166!> \note
1167!> for a real symmetric matrix it holds that || A ||_2 = |lambda_max| < || A ||_infinity
1168!> Hence this can be used to estimate an upper bound for the eigenvalues of a matrix
1169!> http://mathworld.wolfram.com/MatrixNorm.html
1170!> (but the bound is not so tight in the general case)
1171! **************************************************************************************************
1172 SUBROUTINE cp_fm_maxabsrownorm(matrix, a_max)
1173 TYPE(cp_fm_type), INTENT(IN) :: matrix
1174 REAL(kind=dp), INTENT(OUT) :: a_max
1175
1176 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_maxabsrownorm'
1177
1178 INTEGER :: handle, i, j, ncol_local, nrow_global, &
1179 nrow_local
1180 INTEGER, DIMENSION(:), POINTER :: row_indices
1181 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: values
1182 REAL(kind=dp), DIMENSION(:, :), POINTER :: my_block
1183
1184 CALL timeset(routinen, handle)
1185
1186 my_block => matrix%local_data
1187
1188 CALL cp_fm_get_info(matrix, row_indices=row_indices, nrow_global=nrow_global, &
1189 nrow_local=nrow_local, ncol_local=ncol_local)
1190
1191 ! the efficiency could be improved by making use of the row-col distribution of scalapack
1192 ALLOCATE (values(nrow_global))
1193 values = 0.0_dp
1194 DO j = 1, ncol_local
1195 DO i = 1, nrow_local
1196 values(row_indices(i)) = values(row_indices(i)) + abs(my_block(i, j))
1197 END DO
1198 END DO
1199 CALL matrix%matrix_struct%para_env%sum(values)
1200 a_max = maxval(values)
1201 DEALLOCATE (values)
1202
1203 CALL timestop(handle)
1204 END SUBROUTINE cp_fm_maxabsrownorm
1205
1206! **************************************************************************************************
1207!> \brief find the inorm of each column norm_{j}= sqrt( \sum_{i} A_{ij}*A_{ij} )
1208!> \param matrix ...
1209!> \param norm_array ...
1210! **************************************************************************************************
1211 SUBROUTINE cp_fm_vectorsnorm(matrix, norm_array)
1212 TYPE(cp_fm_type), INTENT(IN) :: matrix
1213 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: norm_array
1214
1215 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_vectorsnorm'
1216
1217 INTEGER :: handle, i, j, ncol_global, ncol_local, &
1218 nrow_local
1219 INTEGER, DIMENSION(:), POINTER :: col_indices
1220 REAL(kind=dp), DIMENSION(:, :), POINTER :: my_block
1221
1222 CALL timeset(routinen, handle)
1223
1224 my_block => matrix%local_data
1225
1226 CALL cp_fm_get_info(matrix, col_indices=col_indices, ncol_global=ncol_global, &
1227 nrow_local=nrow_local, ncol_local=ncol_local)
1228
1229 ! the efficiency could be improved by making use of the row-col distribution of scalapack
1230 norm_array = 0.0_dp
1231 DO j = 1, ncol_local
1232 DO i = 1, nrow_local
1233 norm_array(col_indices(j)) = norm_array(col_indices(j)) + my_block(i, j)*my_block(i, j)
1234 END DO
1235 END DO
1236 CALL matrix%matrix_struct%para_env%sum(norm_array)
1237 norm_array = sqrt(norm_array)
1238
1239 CALL timestop(handle)
1240 END SUBROUTINE cp_fm_vectorsnorm
1241
1242! **************************************************************************************************
1243!> \brief summing up all the elements along the matrix's i-th index
1244!> \f$ \mathrm{sum}_{j} = \sum_{i} A_{ij} \f$
1245!> or
1246!> \f$ \mathrm{sum}_{i} = \sum_{j} A_{ij} \f$
1247!> \param matrix an input matrix A
1248!> \param sum_array sums of elements in each column/row
1249!> \param dir ...
1250!> \note forked from cp_fm_vectorsnorm() to be used with
1251!> the maximum overlap method
1252!> added row variation
1253! **************************************************************************************************
1254 SUBROUTINE cp_fm_vectorssum(matrix, sum_array, dir)
1255 TYPE(cp_fm_type), INTENT(IN) :: matrix
1256 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: sum_array
1257 CHARACTER(LEN=1), INTENT(IN), OPTIONAL :: dir
1258
1259 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_vectorssum'
1260
1261 INTEGER :: handle, i, j, ncol_local, nrow_local
1262 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1263 LOGICAL :: docol
1264 REAL(kind=dp), DIMENSION(:, :), POINTER :: my_block
1265
1266 CALL timeset(routinen, handle)
1267
1268 IF (PRESENT(dir)) THEN
1269 IF (dir == 'c' .OR. dir == 'C') THEN
1270 docol = .true.
1271 ELSEIF (dir == 'r' .OR. dir == 'R') THEN
1272 docol = .false.
1273 ELSE
1274 cpabort('Wrong argument DIR')
1275 END IF
1276 ELSE
1277 docol = .true.
1278 END IF
1279
1280 my_block => matrix%local_data
1281
1282 CALL cp_fm_get_info(matrix, col_indices=col_indices, row_indices=row_indices, &
1283 nrow_local=nrow_local, ncol_local=ncol_local)
1284
1285 ! the efficiency could be improved by making use of the row-col distribution of scalapack
1286 sum_array(:) = 0.0_dp
1287 IF (docol) THEN
1288 DO j = 1, ncol_local
1289 DO i = 1, nrow_local
1290 sum_array(col_indices(j)) = sum_array(col_indices(j)) + my_block(i, j)
1291 END DO
1292 END DO
1293 ELSE
1294 DO j = 1, ncol_local
1295 DO i = 1, nrow_local
1296 sum_array(row_indices(i)) = sum_array(row_indices(i)) + my_block(i, j)
1297 END DO
1298 END DO
1299 END IF
1300 CALL matrix%matrix_struct%para_env%sum(sum_array)
1301
1302 CALL timestop(handle)
1303 END SUBROUTINE cp_fm_vectorssum
1304
1305! **************************************************************************************************
1306!> \brief copy one identically sized matrix in the other
1307!> \param source ...
1308!> \param destination ...
1309!> \note
1310!> see also cp_fm_to_fm_columns
1311! **************************************************************************************************
1312 SUBROUTINE cp_fm_to_fm_matrix(source, destination)
1313
1314 TYPE(cp_fm_type), INTENT(IN) :: source, destination
1315
1316 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_to_fm_matrix'
1317
1318 INTEGER :: handle, npcol, nprow
1319
1320 CALL timeset(routinen, handle)
1321
1322 nprow = source%matrix_struct%context%num_pe(1)
1323 npcol = source%matrix_struct%context%num_pe(2)
1324
1325 IF ((.NOT. cp2k_is_parallel) .OR. &
1326 cp_fm_struct_equivalent(source%matrix_struct, &
1327 destination%matrix_struct)) THEN
1328 IF (SIZE(source%local_data, 1) /= SIZE(destination%local_data, 1) .OR. &
1329 SIZE(source%local_data, 2) /= SIZE(destination%local_data, 2)) &
1330 CALL cp_abort(__location__, &
1331 "Cannot copy full matrix <"//trim(source%name)// &
1332 "> to full matrix <"//trim(destination%name)// &
1333 ">. The local_data blocks have different sizes.")
1334 CALL dcopy(SIZE(source%local_data, 1)*SIZE(source%local_data, 2), &
1335 source%local_data, 1, destination%local_data, 1)
1336 ELSE
1337 cpabort("Data structures of source and target full matrix are not equivalent")
1338 END IF
1339
1340 CALL timestop(handle)
1341
1342 END SUBROUTINE cp_fm_to_fm_matrix
1343
1344! **************************************************************************************************
1345!> \brief copy just a subset of columns of a fm to a fm
1346!> \param msource ...
1347!> \param mtarget ...
1348!> \param ncol ...
1349!> \param source_start ...
1350!> \param target_start ...
1351! **************************************************************************************************
1352 SUBROUTINE cp_fm_to_fm_columns(msource, mtarget, ncol, source_start, &
1353 target_start)
1354
1355 TYPE(cp_fm_type), INTENT(IN) :: msource, mtarget
1356 INTEGER, INTENT(IN) :: ncol
1357 INTEGER, INTENT(IN), OPTIONAL :: source_start, target_start
1358
1359 CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_columns'
1360
1361 INTEGER :: handle, n, ss, ts
1362 REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b
1363#if defined(__parallel)
1364 INTEGER :: i
1365 INTEGER, DIMENSION(9) :: desca, descb
1366#endif
1367
1368 CALL timeset(routinen, handle)
1369
1370 ss = 1
1371 ts = 1
1372
1373 IF (PRESENT(source_start)) ss = source_start
1374 IF (PRESENT(target_start)) ts = target_start
1375
1376 n = msource%matrix_struct%nrow_global
1377
1378 a => msource%local_data
1379 b => mtarget%local_data
1380
1381#if defined(__parallel)
1382 desca(:) = msource%matrix_struct%descriptor(:)
1383 descb(:) = mtarget%matrix_struct%descriptor(:)
1384 DO i = 0, ncol - 1
1385 CALL pdcopy(n, a, 1, ss + i, desca, 1, b, 1, ts + i, descb, 1)
1386 END DO
1387#else
1388 IF (ss <= SIZE(a, 2) .AND. ts <= SIZE(b, 2)) THEN
1389 CALL dcopy(ncol*n, a(:, ss), 1, b(:, ts), 1)
1390 END IF
1391#endif
1392
1393 CALL timestop(handle)
1394
1395 END SUBROUTINE cp_fm_to_fm_columns
1396
1397! **************************************************************************************************
1398!> \brief copy just a triangular matrix
1399!> \param msource ...
1400!> \param mtarget ...
1401!> \param uplo ...
1402! **************************************************************************************************
1403 SUBROUTINE cp_fm_to_fm_triangular(msource, mtarget, uplo)
1404
1405 TYPE(cp_fm_type), INTENT(IN) :: msource, mtarget
1406 CHARACTER(LEN=1), OPTIONAL, INTENT(IN) :: uplo
1407
1408 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_to_fm_triangular'
1409
1410 CHARACTER(LEN=1) :: myuplo
1411 INTEGER :: handle, ncol, nrow
1412 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b
1413#if defined(__parallel)
1414 INTEGER, DIMENSION(9) :: desca, descb
1415#endif
1416
1417 CALL timeset(routinen, handle)
1418
1419 myuplo = 'U'
1420 IF (PRESENT(uplo)) myuplo = uplo
1421
1422 nrow = msource%matrix_struct%nrow_global
1423 ncol = msource%matrix_struct%ncol_global
1424
1425 a => msource%local_data
1426 b => mtarget%local_data
1427
1428#if defined(__parallel)
1429 desca(:) = msource%matrix_struct%descriptor(:)
1430 descb(:) = mtarget%matrix_struct%descriptor(:)
1431 CALL pdlacpy(myuplo, nrow, ncol, a(1, 1), 1, 1, desca, b(1, 1), 1, 1, descb)
1432#else
1433 CALL dlacpy(myuplo, nrow, ncol, a(1, 1), nrow, b(1, 1), nrow)
1434#endif
1435
1436 CALL timestop(handle)
1437
1438 END SUBROUTINE cp_fm_to_fm_triangular
1439
1440! **************************************************************************************************
1441!> \brief copy just a part ot the matrix
1442!> \param msource ...
1443!> \param mtarget ...
1444!> \param nrow ...
1445!> \param ncol ...
1446!> \param s_firstrow ...
1447!> \param s_firstcol ...
1448!> \param t_firstrow ...
1449!> \param t_firstcol ...
1450! **************************************************************************************************
1451
1452 SUBROUTINE cp_fm_to_fm_submat(msource, mtarget, nrow, ncol, s_firstrow, s_firstcol, t_firstrow, t_firstcol)
1453
1454 TYPE(cp_fm_type), INTENT(IN) :: msource, mtarget
1455 INTEGER, INTENT(IN) :: nrow, ncol, s_firstrow, &
1456 s_firstcol, t_firstrow, &
1457 t_firstcol
1458
1459 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_to_fm_submat'
1460
1461 INTEGER :: handle, i, na, nb, ss, ts
1462 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b
1463#if defined(__parallel)
1464 INTEGER, DIMENSION(9) :: desca, descb
1465#endif
1466
1467 CALL timeset(routinen, handle)
1468
1469 a => msource%local_data
1470 b => mtarget%local_data
1471
1472 na = msource%matrix_struct%nrow_global
1473 nb = mtarget%matrix_struct%nrow_global
1474! nrow must be <= na and nb
1475 IF (nrow > na) &
1476 cpabort("cannot copy because nrow > number of rows of source matrix")
1477 IF (nrow > nb) &
1478 cpabort("cannot copy because nrow > number of rows of target matrix")
1479 na = msource%matrix_struct%ncol_global
1480 nb = mtarget%matrix_struct%ncol_global
1481! ncol must be <= na_col and nb_col
1482 IF (ncol > na) &
1483 cpabort("cannot copy because nrow > number of rows of source matrix")
1484 IF (ncol > nb) &
1485 cpabort("cannot copy because nrow > number of rows of target matrix")
1486
1487#if defined(__parallel)
1488 desca(:) = msource%matrix_struct%descriptor(:)
1489 descb(:) = mtarget%matrix_struct%descriptor(:)
1490 DO i = 0, ncol - 1
1491 ss = s_firstcol + i
1492 ts = t_firstcol + i
1493 CALL pdcopy(nrow, a, s_firstrow, ss, desca, 1, b, t_firstrow, ts, descb, 1)
1494 END DO
1495#else
1496 DO i = 0, ncol - 1
1497 ss = s_firstcol + i
1498 ts = t_firstcol + i
1499 CALL dcopy(nrow, a(s_firstrow:, ss), 1, b(t_firstrow:, ts), 1)
1500 END DO
1501#endif
1502
1503 CALL timestop(handle)
1504 END SUBROUTINE cp_fm_to_fm_submat
1505
1506! **************************************************************************************************
1507!> \brief General copy of a fm matrix to another fm matrix.
1508!> Uses non-blocking MPI rather than ScaLAPACK.
1509!>
1510!> \param source input fm matrix
1511!> \param destination output fm matrix
1512!> \param para_env parallel environment corresponding to the BLACS env that covers all parts
1513!> of the input and output matrices
1514!> \par History
1515!> 31-Jan-2017 : Re-implemented using non-blocking MPI [IainB, MarkT]
1516! **************************************************************************************************
1517 SUBROUTINE cp_fm_copy_general(source, destination, para_env)
1518 TYPE(cp_fm_type), INTENT(IN) :: source, destination
1519 TYPE(mp_para_env_type), INTENT(IN) :: para_env
1520
1521 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_copy_general'
1522
1523 INTEGER :: handle
1524 TYPE(copy_info_type) :: info
1525
1526 CALL timeset(routinen, handle)
1527
1528 CALL cp_fm_start_copy_general(source, destination, para_env, info)
1529 IF (ASSOCIATED(destination%matrix_struct)) THEN
1530 CALL cp_fm_finish_copy_general(destination, info)
1531 END IF
1532 IF (ASSOCIATED(source%matrix_struct)) THEN
1534 END IF
1535
1536 CALL timestop(handle)
1537 END SUBROUTINE cp_fm_copy_general
1538
1539! **************************************************************************************************
1540!> \brief Initiates the copy operation: get distribution data, post MPI isend and irecvs
1541!> \param source input fm matrix
1542!> \param destination output fm matrix
1543!> \param para_env parallel environment corresponding to the BLACS env that covers all parts
1544!> of the input and output matrices
1545!> \param info all of the data that will be needed to complete the copy operation
1546! **************************************************************************************************
1547 SUBROUTINE cp_fm_start_copy_general(source, destination, para_env, info)
1548 TYPE(cp_fm_type), INTENT(IN) :: source, destination
1549 TYPE(mp_para_env_type), INTENT(IN) :: para_env
1550 TYPE(copy_info_type), INTENT(OUT) :: info
1551
1552 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_start_copy_general'
1553
1554 INTEGER :: dest_p_i, dest_q_j, global_rank, global_size, handle, i, j, k, mpi_rank, &
1555 ncol_block_dest, ncol_block_src, ncol_local_recv, ncol_local_send, ncols, &
1556 nrow_block_dest, nrow_block_src, nrow_local_recv, nrow_local_send, nrows, p, q, &
1557 recv_rank, recv_size, send_rank, send_size
1558 INTEGER, ALLOCATABLE, DIMENSION(:) :: all_ranks, dest2global, dest_p, dest_q, &
1559 recv_count, send_count, send_disp, &
1560 source2global, src_p, src_q
1561 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: dest_blacs2mpi
1562 INTEGER, DIMENSION(2) :: dest_block, dest_block_tmp, dest_num_pe, &
1563 src_block, src_block_tmp, src_num_pe
1564 INTEGER, DIMENSION(:), POINTER :: recv_col_indices, recv_row_indices, &
1565 send_col_indices, send_row_indices
1566 TYPE(cp_fm_struct_type), POINTER :: recv_dist, send_dist
1567 TYPE(mp_request_type), DIMENSION(6) :: recv_req, send_req
1568
1569 CALL timeset(routinen, handle)
1570
1571 IF (.NOT. cp2k_is_parallel) THEN
1572 ! Just copy all of the matrix data into a 'send buffer', to be unpacked later
1573 nrow_local_send = SIZE(source%local_data, 1)
1574 ncol_local_send = SIZE(source%local_data, 2)
1575 ALLOCATE (info%send_buf(nrow_local_send*ncol_local_send))
1576 k = 0
1577 DO j = 1, ncol_local_send
1578 DO i = 1, nrow_local_send
1579 k = k + 1
1580 info%send_buf(k) = source%local_data(i, j)
1581 END DO
1582 END DO
1583 ELSE
1584 NULLIFY (recv_dist, send_dist)
1585 NULLIFY (recv_col_indices, recv_row_indices, send_col_indices, send_row_indices)
1586
1587 ! The 'global' communicator contains both the source and destination decompositions
1588 global_size = para_env%num_pe
1589 global_rank = para_env%mepos
1590
1591 ! The source/send decomposition and destination/recv decompositions may only exist on
1592 ! on a subset of the processes involved in the communication
1593 ! Check if the source and/or destination arguments are .not. ASSOCIATED():
1594 ! if so, skip the send / recv parts (since these processes do not participate in the sending/receiving distribution)
1595 IF (ASSOCIATED(destination%matrix_struct)) THEN
1596 recv_dist => destination%matrix_struct
1597 recv_rank = recv_dist%para_env%mepos
1598 ELSE
1599 recv_rank = mp_proc_null
1600 END IF
1601
1602 IF (ASSOCIATED(source%matrix_struct)) THEN
1603 send_dist => source%matrix_struct
1604 send_rank = send_dist%para_env%mepos
1605 ELSE
1606 send_rank = mp_proc_null
1607 END IF
1608
1609 ! Map the rank in the source/dest communicator to the global rank
1610 ALLOCATE (all_ranks(0:global_size - 1))
1611
1612 CALL para_env%allgather(send_rank, all_ranks)
1613 IF (ASSOCIATED(recv_dist)) THEN
1614 ALLOCATE (source2global(0:count(all_ranks /= mp_proc_null) - 1))
1615 DO i = 0, global_size - 1
1616 IF (all_ranks(i) /= mp_proc_null) THEN
1617 source2global(all_ranks(i)) = i
1618 END IF
1619 END DO
1620 END IF
1621
1622 CALL para_env%allgather(recv_rank, all_ranks)
1623 IF (ASSOCIATED(send_dist)) THEN
1624 ALLOCATE (dest2global(0:count(all_ranks /= mp_proc_null) - 1))
1625 DO i = 0, global_size - 1
1626 IF (all_ranks(i) /= mp_proc_null) THEN
1627 dest2global(all_ranks(i)) = i
1628 END IF
1629 END DO
1630 END IF
1631 DEALLOCATE (all_ranks)
1632
1633 ! Some data from the two decompositions will be needed by all processes in the global group :
1634 ! process grid shape, block size, and the BLACS-to-MPI mapping
1635
1636 ! The global root process will receive the data (from the root process in each decomposition)
1637 send_req(:) = mp_request_null
1638 IF (global_rank == 0) THEN
1639 recv_req(:) = mp_request_null
1640 CALL para_env%irecv(src_block, mp_any_source, recv_req(1), tag=src_tag)
1641 CALL para_env%irecv(dest_block, mp_any_source, recv_req(2), tag=dest_tag)
1642 CALL para_env%irecv(src_num_pe, mp_any_source, recv_req(3), tag=src_tag)
1643 CALL para_env%irecv(dest_num_pe, mp_any_source, recv_req(4), tag=dest_tag)
1644 END IF
1645
1646 IF (ASSOCIATED(send_dist)) THEN
1647 IF ((send_rank == 0)) THEN
1648 ! need to use separate buffers here in case this is actually global rank 0
1649 src_block_tmp = [send_dist%nrow_block, send_dist%ncol_block]
1650 CALL para_env%isend(src_block_tmp, 0, send_req(1), tag=src_tag)
1651 CALL para_env%isend(send_dist%context%num_pe, 0, send_req(2), tag=src_tag)
1652 END IF
1653 END IF
1654
1655 IF (ASSOCIATED(recv_dist)) THEN
1656 IF ((recv_rank == 0)) THEN
1657 dest_block_tmp = [recv_dist%nrow_block, recv_dist%ncol_block]
1658 CALL para_env%isend(dest_block_tmp, 0, send_req(3), tag=dest_tag)
1659 CALL para_env%isend(recv_dist%context%num_pe, 0, send_req(4), tag=dest_tag)
1660 END IF
1661 END IF
1662
1663 IF (global_rank == 0) THEN
1664 CALL mp_waitall(recv_req(1:4))
1665 ! Now we know the process decomposition, we can allocate the arrays to hold the blacs2mpi mapping
1666 ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
1667 dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1) &
1668 )
1669 CALL para_env%irecv(info%src_blacs2mpi, mp_any_source, recv_req(5), tag=src_tag)
1670 CALL para_env%irecv(dest_blacs2mpi, mp_any_source, recv_req(6), tag=dest_tag)
1671 END IF
1672
1673 IF (ASSOCIATED(send_dist)) THEN
1674 IF ((send_rank == 0)) THEN
1675 CALL para_env%isend(send_dist%context%blacs2mpi(:, :), 0, send_req(5), tag=src_tag)
1676 END IF
1677 END IF
1678
1679 IF (ASSOCIATED(recv_dist)) THEN
1680 IF ((recv_rank == 0)) THEN
1681 CALL para_env%isend(recv_dist%context%blacs2mpi(:, :), 0, send_req(6), tag=dest_tag)
1682 END IF
1683 END IF
1684
1685 IF (global_rank == 0) THEN
1686 CALL mp_waitall(recv_req(5:6))
1687 END IF
1688
1689 ! Finally, broadcast the data to all processes in the global communicator
1690 CALL para_env%bcast(src_block, 0)
1691 CALL para_env%bcast(dest_block, 0)
1692 CALL para_env%bcast(src_num_pe, 0)
1693 CALL para_env%bcast(dest_num_pe, 0)
1694 info%src_num_pe(1:2) = src_num_pe(1:2)
1695 info%nblock_src(1:2) = src_block(1:2)
1696 IF (global_rank /= 0) THEN
1697 ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
1698 dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1) &
1699 )
1700 END IF
1701 CALL para_env%bcast(info%src_blacs2mpi, 0)
1702 CALL para_env%bcast(dest_blacs2mpi, 0)
1703
1704 recv_size = dest_num_pe(1)*dest_num_pe(2)
1705 send_size = src_num_pe(1)*src_num_pe(2)
1706 info%send_size = send_size
1707 CALL mp_waitall(send_req(:))
1708
1709 ! Setup is now complete, we can start the actual communication here.
1710 ! The order implemented here is:
1711 ! DEST_1
1712 ! compute recv sizes
1713 ! call irecv
1714 ! SRC_1
1715 ! compute send sizes
1716 ! pack send buffers
1717 ! call isend
1718 ! DEST_2
1719 ! wait for the recvs and unpack buffers (this part eventually will go into another routine to allow comms to run concurrently)
1720 ! SRC_2
1721 ! wait for the sends
1722
1723 ! DEST_1
1724 IF (ASSOCIATED(recv_dist)) THEN
1725 CALL cp_fm_struct_get(recv_dist, row_indices=recv_row_indices, &
1726 col_indices=recv_col_indices &
1727 )
1728 info%recv_col_indices => recv_col_indices
1729 info%recv_row_indices => recv_row_indices
1730 nrow_block_src = src_block(1)
1731 ncol_block_src = src_block(2)
1732 ALLOCATE (recv_count(0:send_size - 1), info%recv_disp(0:send_size - 1), info%recv_request(0:send_size - 1))
1733
1734 ! Determine the recv counts, allocate the receive buffers, call mpi_irecv for all the non-zero sized receives
1735 nrow_local_recv = recv_dist%nrow_locals(recv_dist%context%mepos(1))
1736 ncol_local_recv = recv_dist%ncol_locals(recv_dist%context%mepos(2))
1737 info%nlocal_recv(1) = nrow_local_recv
1738 info%nlocal_recv(2) = ncol_local_recv
1739 ! Initialise src_p, src_q arrays (sized using number of rows/cols in the receiving distribution)
1740 ALLOCATE (src_p(nrow_local_recv), src_q(ncol_local_recv))
1741 DO i = 1, nrow_local_recv
1742 ! For each local row we will receive, we look up its global row (in recv_row_indices),
1743 ! then work out which row block it comes from, and which process row that row block comes from.
1744 src_p(i) = mod(((recv_row_indices(i) - 1)/nrow_block_src), src_num_pe(1))
1745 END DO
1746 DO j = 1, ncol_local_recv
1747 ! Similarly for the columns
1748 src_q(j) = mod(((recv_col_indices(j) - 1)/ncol_block_src), src_num_pe(2))
1749 END DO
1750 ! src_p/q now contains the process row/column ID that will send data to that row/column
1751
1752 DO q = 0, src_num_pe(2) - 1
1753 ncols = count(src_q == q)
1754 DO p = 0, src_num_pe(1) - 1
1755 nrows = count(src_p == p)
1756 ! Use the send_dist here as we are looking up the processes where the data comes from
1757 recv_count(info%src_blacs2mpi(p, q)) = nrows*ncols
1758 END DO
1759 END DO
1760 DEALLOCATE (src_p, src_q)
1761
1762 ! Use one long buffer (and displacements into that buffer)
1763 ! this prevents the need for a rectangular array where not all elements will be populated
1764 ALLOCATE (info%recv_buf(sum(recv_count(:))))
1765 info%recv_disp(0) = 0
1766 DO i = 1, send_size - 1
1767 info%recv_disp(i) = info%recv_disp(i - 1) + recv_count(i - 1)
1768 END DO
1769
1770 ! Issue receive calls on ranks which expect data
1771 DO k = 0, send_size - 1
1772 IF (recv_count(k) > 0) THEN
1773 CALL para_env%irecv(info%recv_buf(info%recv_disp(k) + 1:info%recv_disp(k) + recv_count(k)), &
1774 source2global(k), info%recv_request(k))
1775 END IF
1776 END DO
1777 DEALLOCATE (source2global)
1778 END IF ! ASSOCIATED(recv_dist)
1779
1780 ! SRC_1
1781 IF (ASSOCIATED(send_dist)) THEN
1782 CALL cp_fm_struct_get(send_dist, row_indices=send_row_indices, &
1783 col_indices=send_col_indices &
1784 )
1785 nrow_block_dest = dest_block(1)
1786 ncol_block_dest = dest_block(2)
1787 ALLOCATE (send_count(0:recv_size - 1), send_disp(0:recv_size - 1), info%send_request(0:recv_size - 1))
1788
1789 ! Determine the send counts, allocate the send buffers
1790 nrow_local_send = send_dist%nrow_locals(send_dist%context%mepos(1))
1791 ncol_local_send = send_dist%ncol_locals(send_dist%context%mepos(2))
1792
1793 ! Initialise dest_p, dest_q arrays (sized nrow_local, ncol_local)
1794 ! i.e. number of rows,cols in the sending distribution
1795 ALLOCATE (dest_p(nrow_local_send), dest_q(ncol_local_send))
1796
1797 DO i = 1, nrow_local_send
1798 ! Use the send_dist%row_indices() here (we are looping over the local rows we will send)
1799 dest_p(i) = mod(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
1800 END DO
1801 DO j = 1, ncol_local_send
1802 dest_q(j) = mod(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
1803 END DO
1804 ! dest_p/q now contain the process row/column ID that will receive data from that row/column
1805
1806 DO q = 0, dest_num_pe(2) - 1
1807 ncols = count(dest_q == q)
1808 DO p = 0, dest_num_pe(1) - 1
1809 nrows = count(dest_p == p)
1810 send_count(dest_blacs2mpi(p, q)) = nrows*ncols
1811 END DO
1812 END DO
1813 DEALLOCATE (dest_p, dest_q)
1814
1815 ! Allocate the send buffer using send_count -- and calculate the offset into the buffer for each process
1816 ALLOCATE (info%send_buf(sum(send_count(:))))
1817 send_disp(0) = 0
1818 DO k = 1, recv_size - 1
1819 send_disp(k) = send_disp(k - 1) + send_count(k - 1)
1820 END DO
1821
1822 ! Loop over the smat, pack the send buffers
1823 send_count(:) = 0
1824 DO j = 1, ncol_local_send
1825 ! Use send_col_indices and row_indices here, as we are looking up the global row/column number of local rows.
1826 dest_q_j = mod(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
1827 DO i = 1, nrow_local_send
1828 dest_p_i = mod(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
1829 mpi_rank = dest_blacs2mpi(dest_p_i, dest_q_j)
1830 send_count(mpi_rank) = send_count(mpi_rank) + 1
1831 info%send_buf(send_disp(mpi_rank) + send_count(mpi_rank)) = source%local_data(i, j)
1832 END DO
1833 END DO
1834
1835 ! For each non-zero send_count, call mpi_isend
1836 DO k = 0, recv_size - 1
1837 IF (send_count(k) > 0) THEN
1838 CALL para_env%isend(info%send_buf(send_disp(k) + 1:send_disp(k) + send_count(k)), &
1839 dest2global(k), info%send_request(k))
1840 END IF
1841 END DO
1842 DEALLOCATE (send_count, send_disp, dest2global)
1843 END IF ! ASSOCIATED(send_dist)
1844 DEALLOCATE (dest_blacs2mpi)
1845
1846 END IF !IF (.NOT. cp2k_is_parallel)
1847
1848 CALL timestop(handle)
1849
1850 END SUBROUTINE cp_fm_start_copy_general
1851
1852! **************************************************************************************************
1853!> \brief Completes the copy operation: wait for comms, unpack, clean up MPI state
1854!> \param destination output fm matrix
1855!> \param info all of the data that will be needed to complete the copy operation
1856! **************************************************************************************************
1857 SUBROUTINE cp_fm_finish_copy_general(destination, info)
1858 TYPE(cp_fm_type), INTENT(IN) :: destination
1859 TYPE(copy_info_type), INTENT(INOUT) :: info
1860
1861 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_finish_copy_general'
1862
1863 INTEGER :: handle, i, j, k, mpi_rank, send_size, &
1864 src_p_i, src_q_j
1865 INTEGER, ALLOCATABLE, DIMENSION(:) :: recv_count
1866 INTEGER, DIMENSION(2) :: nblock_src, nlocal_recv, src_num_pe
1867 INTEGER, DIMENSION(:), POINTER :: recv_col_indices, recv_row_indices
1868
1869 CALL timeset(routinen, handle)
1870
1871 IF (.NOT. cp2k_is_parallel) THEN
1872 ! Now unpack the data from the 'send buffer'
1873 k = 0
1874 DO j = 1, SIZE(destination%local_data, 2)
1875 DO i = 1, SIZE(destination%local_data, 1)
1876 k = k + 1
1877 destination%local_data(i, j) = info%send_buf(k)
1878 END DO
1879 END DO
1880 DEALLOCATE (info%send_buf)
1881 ELSE
1882 ! Set up local variables ...
1883 send_size = info%send_size
1884 nlocal_recv(1:2) = info%nlocal_recv(:)
1885 nblock_src(1:2) = info%nblock_src(:)
1886 src_num_pe(1:2) = info%src_num_pe(:)
1887 recv_col_indices => info%recv_col_indices
1888 recv_row_indices => info%recv_row_indices
1889
1890 ! ... use the local variables to do the work
1891 ! DEST_2
1892 CALL mp_waitall(info%recv_request(:))
1893 ALLOCATE (recv_count(0:send_size - 1))
1894 ! Loop over the rmat, filling it in with data from the recv buffers
1895 ! (here the block sizes, num_pes refer to the distribution of the source matrix)
1896 recv_count(:) = 0
1897 DO j = 1, nlocal_recv(2)
1898 src_q_j = mod(((recv_col_indices(j) - 1)/nblock_src(2)), src_num_pe(2))
1899 DO i = 1, nlocal_recv(1)
1900 src_p_i = mod(((recv_row_indices(i) - 1)/nblock_src(1)), src_num_pe(1))
1901 mpi_rank = info%src_blacs2mpi(src_p_i, src_q_j)
1902 recv_count(mpi_rank) = recv_count(mpi_rank) + 1
1903 destination%local_data(i, j) = info%recv_buf(info%recv_disp(mpi_rank) + recv_count(mpi_rank))
1904 END DO
1905 END DO
1906 DEALLOCATE (recv_count, info%recv_disp, info%recv_request, info%recv_buf, info%src_blacs2mpi)
1907 ! Invalidate the stored state
1908 NULLIFY (info%recv_col_indices, &
1909 info%recv_row_indices)
1910
1911 END IF
1912
1913 CALL timestop(handle)
1914
1915 END SUBROUTINE cp_fm_finish_copy_general
1916
1917! **************************************************************************************************
1918!> \brief Completes the copy operation: wait for comms clean up MPI state
1919!> \param info all of the data that will be needed to complete the copy operation
1920! **************************************************************************************************
1922 TYPE(copy_info_type), INTENT(INOUT) :: info
1923
1924 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_cleanup_copy_general'
1925
1926 INTEGER :: handle
1927
1928 CALL timeset(routinen, handle)
1929
1930 IF (.NOT. cp2k_is_parallel) THEN
1931 ! Don't do anything - no MPI state for the serial case
1932 ELSE
1933 ! SRC_2
1934 ! If this process is also in the destination decomposition, this deallocate
1935 ! Was already done in cp_fm_finish_copy_general
1936 IF (ALLOCATED(info%src_blacs2mpi)) THEN
1937 DEALLOCATE (info%src_blacs2mpi)
1938 END IF
1939 CALL mp_waitall(info%send_request)
1940 DEALLOCATE (info%send_request, info%send_buf)
1941
1942 END IF
1943
1944 CALL timestop(handle)
1945
1946 END SUBROUTINE cp_fm_cleanup_copy_general
1947
1948! **************************************************************************************************
1949!> \brief General copy of a submatrix of fm matrix to a submatrix of another fm matrix.
1950!> The two matrices can have different contexts.
1951!>
1952!> Summary of distribution routines for dense matrices
1953!> The following will copy A(iA:iA+M-1,jA:jA+N-1) to B(iB:iB+M-1,jB:jB+N-1):
1954!>
1955!> call pdgemr2d(M,N,Aloc,iA,jA,descA,Bloc,iB,jB,descB,context)
1956!>
1957!> A process that is not a part of the context of A should set descA(2)
1958!> to -1, and similarly for B.
1959!>
1960!> \param source input fm matrix
1961!> \param destination output fm matrix
1962!> \param nrows number of rows of sub matrix to be copied
1963!> \param ncols number of cols of sub matrix to be copied
1964!> \param s_firstrow starting global row index of sub matrix in source
1965!> \param s_firstcol starting global col index of sub matrix in source
1966!> \param d_firstrow starting global row index of sub matrix in destination
1967!> \param d_firstcol starting global col index of sub matrix in destination
1968!> \param global_context process grid that covers all parts of either A or B.
1969! **************************************************************************************************
1970 SUBROUTINE cp_fm_to_fm_submat_general(source, &
1971 destination, &
1972 nrows, &
1973 ncols, &
1974 s_firstrow, &
1975 s_firstcol, &
1976 d_firstrow, &
1977 d_firstcol, &
1978 global_context)
1979
1980 TYPE(cp_fm_type), INTENT(IN) :: source, destination
1981 INTEGER, INTENT(IN) :: nrows, ncols, s_firstrow, s_firstcol, &
1982 d_firstrow, d_firstcol
1983
1984 CLASS(cp_blacs_type), INTENT(IN) :: global_context
1985
1986 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_to_fm_submat_general'
1987
1988 LOGICAL :: debug
1989 INTEGER :: handle
1990#if defined(__parallel)
1991 INTEGER, DIMENSION(9) :: desca, descb
1992 REAL(kind=dp), DIMENSION(1, 1), TARGET :: dummy
1993 REAL(kind=dp), DIMENSION(:, :), POINTER :: smat, dmat
1994#endif
1995
1996 CALL timeset(routinen, handle)
1997
1998 debug = debug_this_module
1999
2000 IF (.NOT. cp2k_is_parallel) THEN
2001 CALL cp_fm_to_fm_submat(source, &
2002 destination, &
2003 nrows, &
2004 ncols, &
2005 s_firstrow, &
2006 s_firstcol, &
2007 d_firstrow, &
2008 d_firstcol)
2009 ELSE
2010#ifdef __parallel
2011 NULLIFY (smat, dmat)
2012 ! check whether source is available on this process
2013 IF (ASSOCIATED(source%matrix_struct)) THEN
2014 desca = source%matrix_struct%descriptor
2015 IF (nrows > source%matrix_struct%nrow_global) &
2016 cpabort("nrows is greater than nrow_global of source")
2017 IF (ncols > source%matrix_struct%ncol_global) &
2018 cpabort("ncols is greater than ncol_global of source")
2019 smat => source%local_data
2020 ELSE
2021 desca = -1
2022 smat => dummy
2023 END IF
2024 ! check destination is available on this process
2025 IF (ASSOCIATED(destination%matrix_struct)) THEN
2026 descb = destination%matrix_struct%descriptor
2027 IF (nrows > destination%matrix_struct%nrow_global) &
2028 cpabort("nrows is greater than nrow_global of destination")
2029 IF (ncols > destination%matrix_struct%ncol_global) &
2030 cpabort("ncols is greater than ncol_global of destination")
2031 dmat => destination%local_data
2032 ELSE
2033 descb = -1
2034 dmat => dummy
2035 END IF
2036 ! do copy
2037
2038 CALL pdgemr2d(nrows, &
2039 ncols, &
2040 smat, &
2041 s_firstrow, &
2042 s_firstcol, &
2043 desca, &
2044 dmat, &
2045 d_firstrow, &
2046 d_firstcol, &
2047 descb, &
2048 global_context%get_handle())
2049#else
2050 mark_used(global_context)
2051 cpabort("this subroutine only supports SCALAPACK")
2052#endif
2053 END IF
2054
2055 CALL timestop(handle)
2056
2057 END SUBROUTINE cp_fm_to_fm_submat_general
2058
2059! **************************************************************************************************
2060!> \brief ...
2061!> \param matrix ...
2062!> \param irow_global ...
2063!> \param icol_global ...
2064!> \param alpha ...
2065! **************************************************************************************************
2066 SUBROUTINE cp_fm_add_to_element(matrix, irow_global, icol_global, alpha)
2067
2068 ! Add alpha to the matrix element specified by the global indices
2069 ! irow_global and icol_global
2070
2071 ! - Creation (05.05.06,MK)
2072
2073 TYPE(cp_fm_type), INTENT(IN) :: matrix
2074 INTEGER, INTENT(IN) :: irow_global, icol_global
2075 REAL(kind=dp), INTENT(IN) :: alpha
2076
2077 INTEGER :: mypcol, myprow, npcol, nprow
2078 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2079 TYPE(cp_blacs_env_type), POINTER :: context
2080#if defined(__parallel)
2081 INTEGER :: icol_local, ipcol, iprow, &
2082 irow_local
2083 INTEGER, DIMENSION(9) :: desca
2084#endif
2085
2086 context => matrix%matrix_struct%context
2087
2088 myprow = context%mepos(1)
2089 mypcol = context%mepos(2)
2090
2091 nprow = context%num_pe(1)
2092 npcol = context%num_pe(2)
2093
2094 a => matrix%local_data
2095
2096#if defined(__parallel)
2097
2098 desca(:) = matrix%matrix_struct%descriptor(:)
2099
2100 CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
2101 irow_local, icol_local, iprow, ipcol)
2102
2103 IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
2104 a(irow_local, icol_local) = a(irow_local, icol_local) + alpha
2105 END IF
2106
2107#else
2108
2109 a(irow_global, icol_global) = a(irow_global, icol_global) + alpha
2110
2111#endif
2112
2113 END SUBROUTINE cp_fm_add_to_element
2114
2115! **************************************************************************************************
2116!> \brief ...
2117!> \param fm ...
2118!> \param unit ...
2119! **************************************************************************************************
2120 SUBROUTINE cp_fm_write_unformatted(fm, unit)
2121 TYPE(cp_fm_type), INTENT(IN) :: fm
2122 INTEGER, INTENT(IN) :: unit
2123
2124 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_write_unformatted'
2125
2126 INTEGER :: handle, j, max_block, &
2127 ncol_global, nrow_global
2128 TYPE(mp_para_env_type), POINTER :: para_env
2129#if defined(__parallel)
2130 INTEGER :: i, i_block, icol_local, &
2131 in, info, ipcol, &
2132 iprow, irow_local, &
2133 mepos, &
2134 num_pe, rb, tag
2135 INTEGER, DIMENSION(9) :: desc
2136 REAL(kind=dp), DIMENSION(:), POINTER :: vecbuf
2137 REAL(kind=dp), DIMENSION(:, :), POINTER :: newdat
2138 TYPE(cp_blacs_type) :: ictxt_loc
2139 INTEGER, EXTERNAL :: numroc
2140#endif
2141
2142 CALL timeset(routinen, handle)
2143 CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
2144 para_env=para_env)
2145
2146#if defined(__parallel)
2147 num_pe = para_env%num_pe
2148 mepos = para_env%mepos
2149 rb = nrow_global
2150 tag = 0
2151 ! get a new context
2152 CALL ictxt_loc%gridinit(para_env, 'R', 1, num_pe)
2153 CALL descinit(desc, nrow_global, ncol_global, rb, max_block, 0, 0, ictxt_loc%get_handle(), nrow_global, info)
2154 cpassert(info == 0)
2155 associate(nprow => ictxt_loc%num_pe(1), npcol => ictxt_loc%num_pe(2), &
2156 myprow => ictxt_loc%mepos(1), mypcol => ictxt_loc%mepos(2))
2157 in = numroc(ncol_global, max_block, mypcol, 0, npcol)
2158
2159 ALLOCATE (newdat(nrow_global, max(1, in)))
2160
2161 ! do the actual scalapack to cols reordering
2162 CALL pdgemr2d(nrow_global, ncol_global, fm%local_data, 1, 1, &
2163 fm%matrix_struct%descriptor, &
2164 newdat, 1, 1, desc, ictxt_loc%get_handle())
2165
2166 ALLOCATE (vecbuf(nrow_global*max_block))
2167 vecbuf = huge(1.0_dp) ! init for valgrind
2168
2169 DO i = 1, ncol_global, max(max_block, 1)
2170 i_block = min(max_block, ncol_global - i + 1)
2171 CALL infog2l(1, i, desc, nprow, npcol, myprow, mypcol, &
2172 irow_local, icol_local, iprow, ipcol)
2173 IF (ipcol == mypcol) THEN
2174 DO j = 1, i_block
2175 vecbuf((j - 1)*nrow_global + 1:nrow_global*j) = newdat(:, icol_local + j - 1)
2176 END DO
2177 END IF
2178
2179 IF (ipcol == 0) THEN
2180 ! do nothing
2181 ELSE
2182 IF (ipcol == mypcol) THEN
2183 CALL para_env%send(vecbuf(:), 0, tag)
2184 END IF
2185 IF (mypcol == 0) THEN
2186 CALL para_env%recv(vecbuf(:), ipcol, tag)
2187 END IF
2188 END IF
2189
2190 IF (unit > 0) THEN
2191 DO j = 1, i_block
2192 WRITE (unit) vecbuf((j - 1)*nrow_global + 1:nrow_global*j)
2193 END DO
2194 END IF
2195
2196 END DO
2197 END associate
2198 DEALLOCATE (vecbuf)
2199
2200 CALL ictxt_loc%gridexit()
2201
2202 DEALLOCATE (newdat)
2203
2204#else
2205
2206 IF (unit > 0) THEN
2207 DO j = 1, ncol_global
2208 WRITE (unit) fm%local_data(:, j)
2209 END DO
2210 END IF
2211
2212#endif
2213 CALL timestop(handle)
2214
2215 END SUBROUTINE cp_fm_write_unformatted
2216
2217! **************************************************************************************************
2218!> \brief Write out a full matrix in plain text.
2219!> \param fm the matrix to be outputted
2220!> \param unit the unit number for I/O
2221!> \param header optional header
2222!> \param value_format ...
2223! **************************************************************************************************
2224 SUBROUTINE cp_fm_write_formatted(fm, unit, header, value_format)
2225 TYPE(cp_fm_type), INTENT(IN) :: fm
2226 INTEGER, INTENT(IN) :: unit
2227 CHARACTER(LEN=*), INTENT(IN), OPTIONAL :: header, value_format
2228
2229 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_write_formatted'
2230
2231 CHARACTER(LEN=21) :: my_value_format
2232 INTEGER :: handle, i, j, max_block, &
2233 ncol_global, nrow_global
2234 TYPE(mp_para_env_type), POINTER :: para_env
2235#if defined(__parallel)
2236 INTEGER :: i_block, icol_local, &
2237 in, info, ipcol, &
2238 iprow, irow_local, &
2239 mepos, num_pe, rb, tag, k, &
2240 icol, irow
2241 INTEGER, DIMENSION(9) :: desc
2242 REAL(kind=dp), DIMENSION(:), POINTER :: vecbuf
2243 REAL(kind=dp), DIMENSION(:, :), POINTER :: newdat
2244 TYPE(cp_blacs_type) :: ictxt_loc
2245 INTEGER, EXTERNAL :: numroc
2246#endif
2247
2248 CALL timeset(routinen, handle)
2249 CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
2250 para_env=para_env)
2251
2252 IF (PRESENT(value_format)) THEN
2253 cpassert(len_trim(adjustl(value_format)) < 11)
2254 my_value_format = "(I10, I10, "//trim(adjustl(value_format))//")"
2255 ELSE
2256 my_value_format = "(I10, I10, ES24.12)"
2257 END IF
2258
2259 IF (unit > 0) THEN
2260 IF (PRESENT(header)) WRITE (unit, *) header
2261 WRITE (unit, "(A2, A8, A10, A24)") "#", "Row", "Column", adjustl("Value")
2262 END IF
2263
2264#if defined(__parallel)
2265 num_pe = para_env%num_pe
2266 mepos = para_env%mepos
2267 rb = nrow_global
2268 tag = 0
2269 ! get a new context
2270 CALL ictxt_loc%gridinit(para_env, 'R', 1, num_pe)
2271 CALL descinit(desc, nrow_global, ncol_global, rb, max_block, 0, 0, ictxt_loc%get_handle(), nrow_global, info)
2272 cpassert(info == 0)
2273 associate(nprow => ictxt_loc%num_pe(1), npcol => ictxt_loc%num_pe(2), &
2274 myprow => ictxt_loc%mepos(1), mypcol => ictxt_loc%mepos(2))
2275 in = numroc(ncol_global, max_block, mypcol, 0, npcol)
2276
2277 ALLOCATE (newdat(nrow_global, max(1, in)))
2278
2279 ! do the actual scalapack to cols reordering
2280 CALL pdgemr2d(nrow_global, ncol_global, fm%local_data, 1, 1, &
2281 fm%matrix_struct%descriptor, &
2282 newdat, 1, 1, desc, ictxt_loc%get_handle())
2283
2284 ALLOCATE (vecbuf(nrow_global*max_block))
2285 vecbuf = huge(1.0_dp) ! init for valgrind
2286 irow = 1
2287 icol = 1
2288
2289 DO i = 1, ncol_global, max(max_block, 1)
2290 i_block = min(max_block, ncol_global - i + 1)
2291 CALL infog2l(1, i, desc, nprow, npcol, myprow, mypcol, &
2292 irow_local, icol_local, iprow, ipcol)
2293 IF (ipcol == mypcol) THEN
2294 DO j = 1, i_block
2295 vecbuf((j - 1)*nrow_global + 1:nrow_global*j) = newdat(:, icol_local + j - 1)
2296 END DO
2297 END IF
2298
2299 IF (ipcol == 0) THEN
2300 ! do nothing
2301 ELSE
2302 IF (ipcol == mypcol) THEN
2303 CALL para_env%send(vecbuf(:), 0, tag)
2304 END IF
2305 IF (mypcol == 0) THEN
2306 CALL para_env%recv(vecbuf(:), ipcol, tag)
2307 END IF
2308 END IF
2309
2310 IF (unit > 0) THEN
2311 DO j = 1, i_block
2312 DO k = (j - 1)*nrow_global + 1, nrow_global*j
2313 WRITE (unit=unit, fmt=my_value_format) irow, icol, vecbuf(k)
2314 irow = irow + 1
2315 IF (irow > nrow_global) THEN
2316 irow = 1
2317 icol = icol + 1
2318 END IF
2319 END DO
2320 END DO
2321 END IF
2322
2323 END DO
2324 END associate
2325 DEALLOCATE (vecbuf)
2326
2327 CALL ictxt_loc%gridexit()
2328
2329 DEALLOCATE (newdat)
2330
2331#else
2332
2333 IF (unit > 0) THEN
2334 DO j = 1, ncol_global
2335 DO i = 1, nrow_global
2336 WRITE (unit=unit, fmt=my_value_format) i, j, fm%local_data(i, j)
2337 END DO
2338 END DO
2339 END IF
2340
2341#endif
2342 CALL timestop(handle)
2343
2344 END SUBROUTINE cp_fm_write_formatted
2345
2346! **************************************************************************************************
2347!> \brief ...
2348!> \param fm ...
2349!> \param unit ...
2350! **************************************************************************************************
2351 SUBROUTINE cp_fm_read_unformatted(fm, unit)
2352 TYPE(cp_fm_type), INTENT(INOUT) :: fm
2353 INTEGER, INTENT(IN) :: unit
2354
2355 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_read_unformatted'
2356
2357 INTEGER :: handle, j, max_block, &
2358 ncol_global, nrow_global
2359 TYPE(mp_para_env_type), POINTER :: para_env
2360#if defined(__parallel)
2361 INTEGER :: k, n_cols
2362 REAL(kind=dp), DIMENSION(:, :), POINTER :: vecbuf
2363#endif
2364
2365 CALL timeset(routinen, handle)
2366
2367 CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
2368 para_env=para_env)
2369
2370#if defined(__parallel)
2371
2372 ! the parallel case could be made more efficient (see cp_fm_write_unformatted)
2373
2374 ALLOCATE (vecbuf(nrow_global, max_block))
2375
2376 DO j = 1, ncol_global, max_block
2377
2378 n_cols = min(max_block, ncol_global - j + 1)
2379 IF (para_env%mepos == 0) THEN
2380 DO k = 1, n_cols
2381 READ (unit) vecbuf(:, k)
2382 END DO
2383 END IF
2384 CALL para_env%bcast(vecbuf, 0)
2385 CALL cp_fm_set_submatrix(fm, vecbuf, start_row=1, start_col=j, n_cols=n_cols)
2386
2387 END DO
2388
2389 DEALLOCATE (vecbuf)
2390
2391#else
2392
2393 DO j = 1, ncol_global
2394 READ (unit) fm%local_data(:, j)
2395 END DO
2396
2397#endif
2398
2399 CALL timestop(handle)
2400
2401 END SUBROUTINE cp_fm_read_unformatted
2402
2403! **************************************************************************************************
2404!> \brief ...
2405!> \param mm_type ...
2406! **************************************************************************************************
2407 SUBROUTINE cp_fm_setup(mm_type)
2408 INTEGER, INTENT(IN) :: mm_type
2409
2410 cp_fm_mm_type = mm_type
2411 END SUBROUTINE cp_fm_setup
2412
2413! **************************************************************************************************
2414!> \brief ...
2415!> \return ...
2416! **************************************************************************************************
2417 FUNCTION cp_fm_get_mm_type() RESULT(res)
2418 INTEGER :: res
2419
2420 res = cp_fm_mm_type
2421 END FUNCTION cp_fm_get_mm_type
2422
2423! **************************************************************************************************
2424!> \brief ...
2425!> \param ictxt ...
2426!> \param prec ...
2427!> \return ...
2428! **************************************************************************************************
2429 FUNCTION cp_fm_pilaenv(ictxt, prec) RESULT(res)
2430 INTEGER :: ictxt
2431 CHARACTER(LEN=1) :: prec
2432 INTEGER :: res
2433#if defined(__parallel)
2434 INTEGER :: pilaenv
2435 res = pilaenv(ictxt, prec)
2436#else
2437 mark_used(ictxt)
2438 mark_used(prec)
2439 res = -1
2440#endif
2441
2442 END FUNCTION cp_fm_pilaenv
2443
2444END MODULE cp_fm_types
methods related to the blacs parallel environment
wrappers for the actual blacs calls. all functionality needed in the code should actually be provide ...
represent the structure of a full matrix
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
logical function, public cp_fm_struct_equivalent(fmstruct1, fmstruct2)
returns true if the two matrix structures are equivalent, false otherwise.
subroutine, public cp_fm_struct_retain(fmstruct)
retains a full matrix structure
subroutine, public cp_fm_struct_write_info(fmstruct, io_unit)
Write nicely formatted info about the FM struct to the given I/O unit.
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
Definition cp_fm_types.F:15
subroutine, public cp_fm_copy_general(source, destination, para_env)
General copy of a fm matrix to another fm matrix. Uses non-blocking MPI rather than ScaLAPACK.
subroutine, public cp_fm_get_diag(matrix, diag)
returns the diagonal elements of a fm
subroutine, public cp_fm_start_copy_general(source, destination, para_env, info)
Initiates the copy operation: get distribution data, post MPI isend and irecvs.
subroutine, public cp_fm_cleanup_copy_general(info)
Completes the copy operation: wait for comms clean up MPI state.
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
integer function, public cp_fm_get_mm_type()
...
subroutine, public cp_fm_vectorssum(matrix, sum_array, dir)
summing up all the elements along the matrix's i-th index or
subroutine, public cp_fm_write_unformatted(fm, unit)
...
subroutine, public cp_fm_get_element(matrix, irow_global, icol_global, alpha, local)
returns an element of a fm this value is valid on every cpu using this call is expensive
subroutine, public cp_fm_add_to_element(matrix, irow_global, icol_global, alpha)
...
subroutine, public cp_fm_set_all_submatrix(fm, new_value, start_row, start_col, n_rows, n_cols)
sets a submatrix of a full matrix to a given value fm(start_row:start_row+n_rows,start_col:start_col+...
subroutine, public cp_fm_setup(mm_type)
...
subroutine, public cp_fm_set_submatrix(fm, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
sets a submatrix of a full matrix fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = alpha*o...
subroutine, public cp_fm_read_unformatted(fm, unit)
...
subroutine, public cp_fm_vectorsnorm(matrix, norm_array)
find the inorm of each column norm_{j}= sqrt( \sum_{i} A_{ij}*A_{ij} )
subroutine, public cp_fm_write_info(matrix, io_unit)
Write nicely formatted info about the FM to the given I/O unit (including the underlying FM struct)
subroutine, public cp_fm_maxabsrownorm(matrix, a_max)
find the maximum over the rows of the sum of the absolute values of the elements of a given row = || ...
subroutine, public cp_fm_to_fm_submat_general(source, destination, nrows, ncols, s_firstrow, s_firstcol, d_firstrow, d_firstcol, global_context)
General copy of a submatrix of fm matrix to a submatrix of another fm matrix. The two matrices can ha...
subroutine, public cp_fm_to_fm_submat(msource, mtarget, nrow, ncol, s_firstrow, s_firstcol, t_firstrow, t_firstcol)
copy just a part ot the matrix
subroutine, public cp_fm_maxabsval(matrix, a_max, ir_max, ic_max)
find the maximum absolute value of the matrix element maxval(abs(matrix))
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
subroutine, public cp_fm_create(matrix, matrix_struct, name, nrow, ncol, set_zero)
creates a new full matrix with the given structure
subroutine, public cp_fm_finish_copy_general(destination, info)
Completes the copy operation: wait for comms, unpack, clean up MPI state.
subroutine, public cp_fm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
gets a submatrix of a full matrix op(target_m)(1:n_rows,1:n_cols) =fm(start_row:start_row+n_rows,...
subroutine, public cp_fm_set_element(matrix, irow_global, icol_global, alpha)
sets an element of a matrix
subroutine, public cp_fm_init_random(matrix, ncol, start_col)
fills a matrix with random numbers
subroutine, public cp_fm_to_fm_triangular(msource, mtarget, uplo)
copy just a triangular matrix
integer function, public cp_fm_pilaenv(ictxt, prec)
...
subroutine, public cp_fm_write_formatted(fm, unit, header, value_format)
Write out a full matrix in plain text.
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public dp
Definition kinds.F:34
Interface to the message passing library MPI.
integer, parameter, public mp_proc_null
logical, parameter, public cp2k_is_parallel
integer, parameter, public mp_any_source
type(mp_request_type), parameter, public mp_request_null
Parallel (pseudo)random number generator (RNG) for multiple streams and substreams of random numbers.
integer, parameter, public uniform
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
keeps the information about the structure of a full matrix
Stores the state of a copy between cp_fm_start_copy_general and cp_fm_finish_copy_general.
just to build arrays of pointers to matrices
represent a full matrix
stores all the informations relevant to an mpi environment