(git:339af6c)
Loading...
Searching...
No Matches
cp_fm_types.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief represent a full matrix distributed on many processors
10!> \par History
11!> 3) separated structure object, removed globenv, renamed to full matrix
12!> many changes (fawzi 08.2002)
13!> \author Matthias Krack (22.05.2001)
14! **************************************************************************************************
25 USE kinds, ONLY: dp,&
26 sp
34 USE parallel_rng_types, ONLY: uniform,&
36#include "../base/base_uses.f90"
37
38 IMPLICIT NONE
39
40 PRIVATE
41
42 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_types'
43 LOGICAL, PARAMETER :: debug_this_module = .true.
44 INTEGER, PARAMETER :: src_tag = 3, dest_tag = 5, send_tag = 7, recv_tag = 11
45
46 INTEGER, PRIVATE :: cp_fm_mm_type = 1
47
48 PUBLIC :: cp_fm_type, &
50
51 PUBLIC :: cp_fm_add_to_element, &
57 cp_fm_get_diag, & ! get diagonal
58 cp_fm_set_all, & ! set all elements and diagonal
59 cp_fm_set_all_submatrix, & ! set a submatrix to a given value
60 cp_fm_set_submatrix, & ! set a submatrix to given values
61 cp_fm_get_submatrix, & ! get a submatrix of given values
63 cp_fm_maxabsval, & ! find the maximum absolute value
64 cp_fm_maxabsrownorm, & ! find the maximum of the sum of the abs of the elements of a row
65 cp_fm_to_fm, & ! copy (parts of) a fm to a fm
66 cp_fm_vectorsnorm, & ! compute the norm of the column-vectors
67 cp_fm_vectorssum, & ! compute the sum of all elements of the column-vectors
68 cp_fm_to_fm_submat, & ! copy (parts of) a fm to a fm
74 cp_fm_write_unformatted, & ! writes a full matrix to an open unit
75 cp_fm_write_formatted, & ! writes a full matrix to an open unit
76 cp_fm_read_unformatted, & ! reads a full matrix from an open unit
77 cp_fm_setup, & ! allows to set flags for fms
80 cp_fm_to_fm_submat_general ! copy matrix across different contexts
81
82 PUBLIC :: cp_fm_pilaenv
83
84 INTERFACE cp_fm_to_fm
85 MODULE PROCEDURE cp_fm_to_fm_matrix, & ! a full matrix
86 cp_fm_to_fm_columns ! just a number of columns
87 END INTERFACE
88
89 INTERFACE cp_fm_release
90 MODULE PROCEDURE cp_fm_release_aa0, &
91 cp_fm_release_aa1, &
92 cp_fm_release_aa2, &
93 cp_fm_release_aa3, &
94 cp_fm_release_ap1, &
95 cp_fm_release_ap2, &
96 cp_fm_release_pa1, &
97 cp_fm_release_pa2, &
98 cp_fm_release_pa3, &
99 cp_fm_release_pp1, &
100 cp_fm_release_pp2
101 END INTERFACE
102
103! **************************************************************************************************
104!> \brief represent a full matrix
105!> \param name the name of the matrix, used for printing
106!> \param matrix_struct structure of this matrix
107!> \param local_data array with the data of the matrix (its contents
108!> depend on the matrix type used: in parallel runs it will be
109!> in scalapack format, in sequential, it will simply contain
110!> the matrix)
111!> \par History
112!> 08.2002 created [fawzi]
113!> \author fawzi
114! **************************************************************************************************
116! PRIVATE
117 CHARACTER(LEN=60) :: name = ""
118 LOGICAL :: use_sp = .false.
119 TYPE(cp_fm_struct_type), POINTER :: matrix_struct => null()
120 REAL(kind=dp), DIMENSION(:, :), POINTER, CONTIGUOUS :: local_data => null()
121 REAL(kind=sp), DIMENSION(:, :), POINTER, CONTIGUOUS :: local_data_sp => null()
122 END TYPE cp_fm_type
123
124! **************************************************************************************************
125!> \brief just to build arrays of pointers to matrices
126!> \param matrix the pointer to the matrix
127!> \par History
128!> 08.2002 created [fawzi]
129!> \author fawzi
130! **************************************************************************************************
132 TYPE(cp_fm_type), POINTER :: matrix => null()
133 END TYPE cp_fm_p_type
134
135! **************************************************************************************************
136!> \brief Stores the state of a copy between cp_fm_start_copy_general
137!> and cp_fm_finish_copy_general
138!> \par History
139!> Jan 2017 [Mark T]
140! **************************************************************************************************
142 INTEGER :: send_size = -1
143 INTEGER, DIMENSION(2) :: nlocal_recv = -1, nblock_src = -1, src_num_pe = -1 ! 1->row 2->col
144 TYPE(mp_request_type), DIMENSION(:), ALLOCATABLE :: send_request, recv_request
145 INTEGER, DIMENSION(:), ALLOCATABLE :: recv_disp
146 INTEGER, DIMENSION(:), POINTER :: recv_col_indices => null(), recv_row_indices => null()
147 INTEGER, DIMENSION(:, :), ALLOCATABLE :: src_blacs2mpi
148 REAL(kind=dp), DIMENSION(:), ALLOCATABLE :: recv_buf, send_buf
149 END TYPE copy_info_type
150
151CONTAINS
152
153! **************************************************************************************************
154!> \brief creates a new full matrix with the given structure
155!> \param matrix the matrix to be created
156!> \param matrix_struct the structure of matrix
157!> \param name ...
158!> \param use_sp ...
159!> \param nrow ...
160!> \param ncol ...
161!> \param set_zero ...
162!> \par History
163!> 08.2002 created [fawzi]
164!> \author Fawzi Mohamed
165!> \note
166!> preferred allocation routine
167! **************************************************************************************************
168 SUBROUTINE cp_fm_create(matrix, matrix_struct, name, use_sp, nrow, ncol, set_zero)
169 TYPE(cp_fm_type), INTENT(OUT) :: matrix
170 TYPE(cp_fm_struct_type), INTENT(IN), TARGET :: matrix_struct
171 CHARACTER(LEN=*), INTENT(in), OPTIONAL :: name
172 LOGICAL, INTENT(in), OPTIONAL :: use_sp
173 INTEGER, INTENT(IN), OPTIONAL :: nrow, ncol
174 LOGICAL, INTENT(in), OPTIONAL :: set_zero
175
176 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_create'
177
178 INTEGER :: handle, ncol_global, ncol_local, &
179 nrow_global, nrow_local
180 TYPE(cp_blacs_env_type), POINTER :: context
181 TYPE(cp_fm_struct_type), POINTER :: fm_struct
182
183 CALL timeset(routinen, handle)
184
185 IF (PRESENT(nrow) .OR. PRESENT(ncol)) THEN
186 CALL cp_fm_struct_get(matrix_struct, nrow_global=nrow_global, ncol_global=ncol_global)
187 IF (PRESENT(nrow)) nrow_global = nrow
188 IF (PRESENT(ncol)) ncol_global = ncol
189 CALL cp_fm_struct_create(fm_struct, template_fmstruct=matrix_struct, &
190 nrow_global=nrow_global, ncol_global=ncol_global)
191
192 context => fm_struct%context
193 matrix%matrix_struct => fm_struct
194 CALL cp_fm_struct_retain(matrix%matrix_struct)
195
196 nrow_local = fm_struct%local_leading_dimension
197 ncol_local = max(1, fm_struct%ncol_locals(context%mepos(2)))
198
199 CALL cp_fm_struct_release(fm_struct)
200 ELSE
201
202 context => matrix_struct%context
203 matrix%matrix_struct => matrix_struct
204 CALL cp_fm_struct_retain(matrix%matrix_struct)
205
206 ! OK, we allocate here at least a 1 x 1 matrix
207 ! this must (and is) compatible with the descinit call
208 ! in cp_fm_struct
209 nrow_local = matrix_struct%local_leading_dimension
210 ncol_local = max(1, matrix_struct%ncol_locals(context%mepos(2)))
211 END IF
212
213 matrix%use_sp = .false.
214 IF (PRESENT(use_sp)) matrix%use_sp = use_sp
215
216 NULLIFY (matrix%local_data)
217 NULLIFY (matrix%local_data_sp)
218
219 IF (matrix%use_sp) THEN
220 ALLOCATE (matrix%local_data_sp(nrow_local, ncol_local))
221 ELSE
222 ALLOCATE (matrix%local_data(nrow_local, ncol_local))
223 END IF
224
225 IF (PRESENT(set_zero)) THEN
226 IF (set_zero) THEN
227 IF (matrix%use_sp) THEN
228 matrix%local_data_sp(1:nrow_local, 1:ncol_local) = 0.0_sp
229 ELSE
230 matrix%local_data(1:nrow_local, 1:ncol_local) = 0.0_dp
231 END IF
232 END IF
233 END IF
234
235 IF (PRESENT(name)) THEN
236 matrix%name = name
237 ELSE
238 matrix%name = 'full matrix'
239 END IF
240
241 CALL timestop(handle)
242
243 END SUBROUTINE cp_fm_create
244
245! **************************************************************************************************
246!> \brief releases a full matrix
247!> \param matrix the matrix to release
248!> \par History
249!> 08.2002 created [fawzi]
250!> \author Fawzi Mohamed
251! **************************************************************************************************
252 SUBROUTINE cp_fm_release_aa0(matrix)
253 TYPE(cp_fm_type), INTENT(INOUT) :: matrix
254
255 IF (ASSOCIATED(matrix%local_data)) THEN
256 DEALLOCATE (matrix%local_data)
257 NULLIFY (matrix%local_data)
258 END IF
259 IF (ASSOCIATED(matrix%local_data_sp)) THEN
260 DEALLOCATE (matrix%local_data_sp)
261 NULLIFY (matrix%local_data_sp)
262 END IF
263 matrix%name = ""
264 CALL cp_fm_struct_release(matrix%matrix_struct)
265
266 END SUBROUTINE cp_fm_release_aa0
267
268! **************************************************************************************************
269!> \brief ...
270!> \param matrices ...
271! **************************************************************************************************
272 SUBROUTINE cp_fm_release_aa1(matrices)
273 TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:) :: matrices
274
275 INTEGER :: i
276
277 IF (ALLOCATED(matrices)) THEN
278 DO i = 1, SIZE(matrices)
279 CALL cp_fm_release(matrices(i))
280 END DO
281 DEALLOCATE (matrices)
282 END IF
283 END SUBROUTINE cp_fm_release_aa1
284
285! **************************************************************************************************
286!> \brief ...
287!> \param matrices ...
288! **************************************************************************************************
289 SUBROUTINE cp_fm_release_aa2(matrices)
290 TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :) :: matrices
291
292 INTEGER :: i, j
293
294 IF (ALLOCATED(matrices)) THEN
295 DO i = 1, SIZE(matrices, 1)
296 DO j = 1, SIZE(matrices, 2)
297 CALL cp_fm_release(matrices(i, j))
298 END DO
299 END DO
300 DEALLOCATE (matrices)
301 END IF
302 END SUBROUTINE cp_fm_release_aa2
303
304! **************************************************************************************************
305!> \brief ...
306!> \param matrices ...
307! **************************************************************************************************
308 SUBROUTINE cp_fm_release_aa3(matrices)
309 TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:, :, :) :: matrices
310
311 INTEGER :: i, j, k
312
313 IF (ALLOCATED(matrices)) THEN
314 DO i = 1, SIZE(matrices, 1)
315 DO j = 1, SIZE(matrices, 2)
316 DO k = 1, SIZE(matrices, 3)
317 CALL cp_fm_release(matrices(i, j, k))
318 END DO
319 END DO
320 END DO
321 DEALLOCATE (matrices)
322 END IF
323 END SUBROUTINE cp_fm_release_aa3
324
325! **************************************************************************************************
326!> \brief ...
327!> \param matrices ...
328! **************************************************************************************************
329 SUBROUTINE cp_fm_release_pa1(matrices)
330 TYPE(cp_fm_type), DIMENSION(:), POINTER :: matrices
331
332 INTEGER :: i
333
334 IF (ASSOCIATED(matrices)) THEN
335 DO i = 1, SIZE(matrices)
336 CALL cp_fm_release(matrices(i))
337 END DO
338 DEALLOCATE (matrices)
339 NULLIFY (matrices)
340 END IF
341 END SUBROUTINE cp_fm_release_pa1
342
343! **************************************************************************************************
344!> \brief ...
345!> \param matrices ...
346! **************************************************************************************************
347 SUBROUTINE cp_fm_release_pa2(matrices)
348 TYPE(cp_fm_type), DIMENSION(:, :), POINTER :: matrices
349
350 INTEGER :: i, j
351
352 IF (ASSOCIATED(matrices)) THEN
353 DO i = 1, SIZE(matrices, 1)
354 DO j = 1, SIZE(matrices, 2)
355 CALL cp_fm_release(matrices(i, j))
356 END DO
357 END DO
358 DEALLOCATE (matrices)
359 NULLIFY (matrices)
360 END IF
361 END SUBROUTINE cp_fm_release_pa2
362
363! **************************************************************************************************
364!> \brief ...
365!> \param matrices ...
366! **************************************************************************************************
367 SUBROUTINE cp_fm_release_pa3(matrices)
368 TYPE(cp_fm_type), DIMENSION(:, :, :), POINTER :: matrices
369
370 INTEGER :: i, j, k
371
372 IF (ASSOCIATED(matrices)) THEN
373 DO i = 1, SIZE(matrices, 1)
374 DO j = 1, SIZE(matrices, 2)
375 DO k = 1, SIZE(matrices, 3)
376 CALL cp_fm_release(matrices(i, j, k))
377 END DO
378 END DO
379 END DO
380 DEALLOCATE (matrices)
381 NULLIFY (matrices)
382 END IF
383 END SUBROUTINE cp_fm_release_pa3
384
385! **************************************************************************************************
386!> \brief ...
387!> \param matrices ...
388! **************************************************************************************************
389 SUBROUTINE cp_fm_release_ap1(matrices)
390 TYPE(cp_fm_p_type), ALLOCATABLE, DIMENSION(:) :: matrices
391
392 INTEGER :: i
393
394 IF (ALLOCATED(matrices)) THEN
395 DO i = 1, SIZE(matrices)
396 CALL cp_fm_release(matrices(i)%matrix)
397 DEALLOCATE (matrices(i)%matrix)
398 END DO
399 DEALLOCATE (matrices)
400 END IF
401 END SUBROUTINE cp_fm_release_ap1
402
403! **************************************************************************************************
404!> \brief ...
405!> \param matrices ...
406! **************************************************************************************************
407 SUBROUTINE cp_fm_release_ap2(matrices)
408 TYPE(cp_fm_p_type), ALLOCATABLE, DIMENSION(:, :) :: matrices
409
410 INTEGER :: i, j
411
412 IF (ALLOCATED(matrices)) THEN
413 DO i = 1, SIZE(matrices, 1)
414 DO j = 1, SIZE(matrices, 2)
415 CALL cp_fm_release(matrices(i, j)%matrix)
416 DEALLOCATE (matrices(i, j)%matrix)
417 END DO
418 END DO
419 DEALLOCATE (matrices)
420 END IF
421 END SUBROUTINE cp_fm_release_ap2
422
423! **************************************************************************************************
424!> \brief ...
425!> \param matrices ...
426! **************************************************************************************************
427 SUBROUTINE cp_fm_release_pp1(matrices)
428 TYPE(cp_fm_p_type), DIMENSION(:), POINTER :: matrices
429
430 INTEGER :: i
431
432 IF (ASSOCIATED(matrices)) THEN
433 DO i = 1, SIZE(matrices)
434 CALL cp_fm_release(matrices(i)%matrix)
435 DEALLOCATE (matrices(i)%matrix)
436 END DO
437 DEALLOCATE (matrices)
438 NULLIFY (matrices)
439 END IF
440 END SUBROUTINE cp_fm_release_pp1
441
442! **************************************************************************************************
443!> \brief ...
444!> \param matrices ...
445! **************************************************************************************************
446 SUBROUTINE cp_fm_release_pp2(matrices)
447 TYPE(cp_fm_p_type), DIMENSION(:, :), POINTER :: matrices
448
449 INTEGER :: i, j
450
451 IF (ASSOCIATED(matrices)) THEN
452 DO i = 1, SIZE(matrices, 1)
453 DO j = 1, SIZE(matrices, 2)
454 CALL cp_fm_release(matrices(i, j)%matrix)
455 DEALLOCATE (matrices(i, j)%matrix)
456 END DO
457 END DO
458 DEALLOCATE (matrices)
459 NULLIFY (matrices)
460 END IF
461 END SUBROUTINE cp_fm_release_pp2
462
463! **************************************************************************************************
464!> \brief fills a matrix with random numbers
465!> \param matrix : to be initialized
466!> \param ncol : numbers of cols to fill
467!> \param start_col : starting at coll number
468!> \author Joost VandeVondele
469!> \note
470!> the value of a_ij is independent of the number of cpus
471! **************************************************************************************************
472 SUBROUTINE cp_fm_init_random(matrix, ncol, start_col)
473 TYPE(cp_fm_type), INTENT(IN) :: matrix
474 INTEGER, INTENT(IN), OPTIONAL :: ncol, start_col
475
476 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_init_random'
477
478 INTEGER :: handle, icol_global, icol_local, irow_local, my_ncol, my_start_col, ncol_global, &
479 ncol_local, nrow_global, nrow_local
480 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
481 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: buff
482 REAL(kind=dp), CONTIGUOUS, DIMENSION(:, :), &
483 POINTER :: local_data
484 REAL(kind=dp), DIMENSION(3, 2), SAVE :: &
485 seed = reshape([1.0_dp, 2.0_dp, 3.0_dp, 4.0_dp, 5.0_dp, 6.0_dp], [3, 2])
486 TYPE(rng_stream_type) :: rng
487
488 CALL timeset(routinen, handle)
489
490 ! guarantee same seed over all tasks
491 CALL matrix%matrix_struct%para_env%bcast(seed, 0)
492
493 rng = rng_stream_type("cp_fm_init_random_stream", distribution_type=uniform, &
494 extended_precision=.true., seed=seed)
495
496 cpassert(.NOT. matrix%use_sp)
497
498 CALL cp_fm_get_info(matrix, nrow_global=nrow_global, ncol_global=ncol_global, &
499 nrow_local=nrow_local, ncol_local=ncol_local, &
500 local_data=local_data, &
501 row_indices=row_indices, col_indices=col_indices)
502
503 my_start_col = 1
504 IF (PRESENT(start_col)) my_start_col = start_col
505 my_ncol = matrix%matrix_struct%ncol_global
506 IF (PRESENT(ncol)) my_ncol = ncol
507
508 IF (ncol_global < (my_start_col + my_ncol - 1)) &
509 cpabort("ncol_global>=(my_start_col+my_ncol-1)")
510
511 ALLOCATE (buff(nrow_global))
512
513 ! each global row has its own substream, in order to reach the stream for the local col,
514 ! we just reset to the next substream
515 ! following this, we fill the full buff with random numbers, and pick those we need
516 icol_global = 0
517 DO icol_local = 1, ncol_local
518 cpassert(col_indices(icol_local) > icol_global)
519 DO
520 CALL rng%reset_to_next_substream()
521 icol_global = icol_global + 1
522 IF (icol_global == col_indices(icol_local)) EXIT
523 END DO
524 CALL rng%fill(buff)
525 DO irow_local = 1, nrow_local
526 local_data(irow_local, icol_local) = buff(row_indices(irow_local))
527 END DO
528 END DO
529
530 DEALLOCATE (buff)
531
532 ! store seed before deletion (unclear if this is the proper seed)
533
534 ! Note that, the initial state (rng%ig) instead of the current state (rng%cg) is stored in the
535 ! seed variable. As a consequence, each invocation of cp_fm_init_random uses exactly the same
536 ! stream of random numbers. While this seems odd and contrary to the original design,
537 ! it was probably introduced to improve reproducibility.
538 ! See also https://github.com/cp2k/cp2k/pull/506
539 CALL rng%get(ig=seed)
540
541 CALL timestop(handle)
542
543 END SUBROUTINE cp_fm_init_random
544
545! **************************************************************************************************
546!> \brief set all elements of a matrix to the same value,
547!> and optionally the diagonal to a different one
548!> \param matrix input matrix
549!> \param alpha scalar used to set all elements of the matrix
550!> \param beta scalar used to set diagonal of the matrix
551!> \note
552!> can be used to zero a matrix
553!> can be used to create a unit matrix (I-matrix) alpha=0.0_dp beta=1.0_dp
554! **************************************************************************************************
555 SUBROUTINE cp_fm_set_all(matrix, alpha, beta)
556
557 TYPE(cp_fm_type), INTENT(IN) :: matrix
558 REAL(kind=dp), INTENT(IN) :: alpha
559 REAL(kind=dp), INTENT(IN), OPTIONAL :: beta
560
561 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_set_all'
562
563 INTEGER :: handle, i, n
564
565 CALL timeset(routinen, handle)
566
567 IF (matrix%use_sp) THEN
568 matrix%local_data_sp(:, :) = real(alpha, sp)
569 ELSE
570 matrix%local_data(:, :) = alpha
571 END IF
572
573 IF (PRESENT(beta)) THEN
574 cpassert(.NOT. matrix%use_sp)
575 n = min(matrix%matrix_struct%nrow_global, matrix%matrix_struct%ncol_global)
576 DO i = 1, n
577 CALL cp_fm_set_element(matrix, i, i, beta)
578 END DO
579 END IF
580
581 CALL timestop(handle)
582
583 END SUBROUTINE cp_fm_set_all
584
585! **************************************************************************************************
586!> \brief returns the diagonal elements of a fm
587!> \param matrix ...
588!> \param diag ...
589! **************************************************************************************************
590 SUBROUTINE cp_fm_get_diag(matrix, diag)
591
592 ! arguments
593 TYPE(cp_fm_type), INTENT(IN) :: matrix
594 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: diag
595
596 ! locals
597 INTEGER :: i, nrow_global
598
599#if defined(__parallel)
600 INTEGER, DIMENSION(9) :: desca
601 TYPE(cp_blacs_env_type), POINTER :: context
602 INTEGER :: icol_local, ipcol, iprow, irow_local, mypcol, myprow, npcol, &
603 nprow
604 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
605 REAL(kind=sp), DIMENSION(:, :), POINTER :: a_sp
606#endif
607
608 CALL cp_fm_get_info(matrix, nrow_global=nrow_global)
609
610#if defined(__parallel)
611 diag = 0.0_dp
612 context => matrix%matrix_struct%context
613 myprow = context%mepos(1)
614 mypcol = context%mepos(2)
615 nprow = context%num_pe(1)
616 npcol = context%num_pe(2)
617
618 a => matrix%local_data
619 a_sp => matrix%local_data_sp
620 desca(:) = matrix%matrix_struct%descriptor(:)
621
622 DO i = 1, nrow_global
623 CALL infog2l(i, i, desca, nprow, npcol, myprow, mypcol, &
624 irow_local, icol_local, iprow, ipcol)
625 IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
626 IF (matrix%use_sp) THEN
627 diag(i) = real(a_sp(irow_local, icol_local), dp)
628 ELSE
629 diag(i) = a(irow_local, icol_local)
630 END IF
631 END IF
632 END DO
633#else
634 DO i = 1, nrow_global
635 IF (matrix%use_sp) THEN
636 diag(i) = real(matrix%local_data_sp(i, i), dp)
637 ELSE
638 diag(i) = matrix%local_data(i, i)
639 END IF
640 END DO
641#endif
642 CALL matrix%matrix_struct%para_env%sum(diag)
643
644 END SUBROUTINE cp_fm_get_diag
645
646! **************************************************************************************************
647!> \brief returns an element of a fm
648!> this value is valid on every cpu
649!> using this call is expensive
650!> \param matrix the matrix to read
651!> \param irow_global the row
652!> \param icol_global the col
653!> \param alpha the value of matrix(irow_global, icol_global)
654!> \param local true if the element is on this cpu, false otherwise
655!> \note
656!> - modified semantics. now this function always returns the value
657!> previously the value was zero on cpus that didn't own the relevant
658!> part of the matrix (Joost VandeVondele, May 2003)
659!> - usage of the function should be avoided, as it is likely to rather slow
660!> using row_indices/col_indices/local_data + some smart scheme normally
661!> yields a real parallel code
662! **************************************************************************************************
663 SUBROUTINE cp_fm_get_element(matrix, irow_global, icol_global, alpha, local)
664
665 ! arguments
666 TYPE(cp_fm_type), INTENT(IN) :: matrix
667 REAL(kind=dp), INTENT(OUT) :: alpha
668 INTEGER, INTENT(IN) :: icol_global, &
669 irow_global
670 LOGICAL, INTENT(OUT), OPTIONAL :: local
671
672 ! locals
673#if defined(__parallel)
674 INTEGER, DIMENSION(9) :: desca
675 TYPE(cp_blacs_env_type), POINTER :: context
676 INTEGER :: icol_local, ipcol, iprow, irow_local, mypcol, myprow, npcol, &
677 nprow
678 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
679#endif
680
681#if defined(__parallel)
682 context => matrix%matrix_struct%context
683 myprow = context%mepos(1)
684 mypcol = context%mepos(2)
685 nprow = context%num_pe(1)
686 npcol = context%num_pe(2)
687
688 a => matrix%local_data
689 desca(:) = matrix%matrix_struct%descriptor(:)
690
691 CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
692 irow_local, icol_local, iprow, ipcol)
693
694 IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
695 alpha = a(irow_local, icol_local)
696 CALL context%dgebs2d('All', ' ', 1, 1, alpha, 1)
697 IF (PRESENT(local)) local = .true.
698 ELSE
699 CALL context%dgebr2d('All', ' ', 1, 1, alpha, 1, iprow, ipcol)
700 IF (PRESENT(local)) local = .false.
701 END IF
702
703#else
704 IF (PRESENT(local)) local = .true.
705 alpha = matrix%local_data(irow_global, icol_global)
706#endif
707
708 END SUBROUTINE cp_fm_get_element
709
710! **************************************************************************************************
711!> \brief sets an element of a matrix
712!> \param matrix ...
713!> \param irow_global ...
714!> \param icol_global ...
715!> \param alpha ...
716!> \note
717!> we expect all cpus to have the same arguments in the call to this function
718!> (otherwise one should use local_data tricks)
719! **************************************************************************************************
720 SUBROUTINE cp_fm_set_element(matrix, irow_global, icol_global, alpha)
721 TYPE(cp_fm_type), INTENT(IN) :: matrix
722 INTEGER, INTENT(IN) :: irow_global, icol_global
723 REAL(kind=dp), INTENT(IN) :: alpha
724
725 INTEGER :: mypcol, myprow, npcol, nprow
726 TYPE(cp_blacs_env_type), POINTER :: context
727#if defined(__parallel)
728 INTEGER :: icol_local, ipcol, iprow, &
729 irow_local
730 INTEGER, DIMENSION(9) :: desca
731 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
732#endif
733
734 context => matrix%matrix_struct%context
735 myprow = context%mepos(1)
736 mypcol = context%mepos(2)
737 nprow = context%num_pe(1)
738 npcol = context%num_pe(2)
739
740 cpassert(.NOT. matrix%use_sp)
741
742#if defined(__parallel)
743
744 a => matrix%local_data
745
746 desca(:) = matrix%matrix_struct%descriptor(:)
747
748 CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
749 irow_local, icol_local, iprow, ipcol)
750
751 IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
752 a(irow_local, icol_local) = alpha
753 END IF
754
755#else
756
757 matrix%local_data(irow_global, icol_global) = alpha
758
759#endif
760 END SUBROUTINE cp_fm_set_element
761
762! **************************************************************************************************
763!> \brief sets a submatrix of a full matrix
764!> fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
765!> = alpha*op(new_values)(1:n_rows,1:n_cols)+ beta
766!> * fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
767!> \param fm the full to change
768!> \param new_values a replicated full matrix with the new values
769!> \param start_row the starting row of b_matrix (defaults to 1)
770!> \param start_col the starting col of b_matrix (defaults to 1)
771!> \param n_rows the number of row to change in b (defaults to
772!> size(op(new_values),1))
773!> \param n_cols the number of columns to change in b (defaults to
774!> size(op(new_values),2))
775!> \param alpha rescaling factor for the new values (defaults to 1.0)
776!> \param beta rescaling factor for the old values (defaults to 0.0)
777!> \param transpose if new_values should be transposed: if true
778!> op(new_values)=new_values^T, else op(new_values)=new_values
779!> (defaults to false)
780!> \par History
781!> 07.2002 created borrowing from Joost's blacs_replicated_copy [fawzi]
782!> \author Fawzi Mohamed
783!> \note
784!> optimized for full column updates and alpha=1.0, beta=0.0
785!> the new_values need to be valid on all cpus
786! **************************************************************************************************
787 SUBROUTINE cp_fm_set_submatrix(fm, new_values, start_row, &
788 start_col, n_rows, n_cols, alpha, beta, transpose)
789 TYPE(cp_fm_type), INTENT(IN) :: fm
790 REAL(kind=dp), DIMENSION(:, :), INTENT(in) :: new_values
791 INTEGER, INTENT(in), OPTIONAL :: start_row, start_col, n_rows, n_cols
792 REAL(kind=dp), INTENT(in), OPTIONAL :: alpha, beta
793 LOGICAL, INTENT(in), OPTIONAL :: transpose
794
795 INTEGER :: i, i0, j, j0, ncol, ncol_block, &
796 ncol_global, ncol_local, nrow, &
797 nrow_block, nrow_global, nrow_local, &
798 this_col, this_row
799 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
800 LOGICAL :: tr_a
801 REAL(kind=dp) :: al, be
802 REAL(kind=dp), DIMENSION(:, :), POINTER :: full_block
803
804 al = 1.0_dp; be = 0.0_dp; i0 = 1; j0 = 1; tr_a = .false.
805 ! can be called too many times, making it a bit useless
806 ! CALL timeset(routineN//','//moduleN,handle)
807
808 cpassert(.NOT. fm%use_sp)
809
810 IF (PRESENT(alpha)) al = alpha
811 IF (PRESENT(beta)) be = beta
812 IF (PRESENT(start_row)) i0 = start_row
813 IF (PRESENT(start_col)) j0 = start_col
814 IF (PRESENT(transpose)) tr_a = transpose
815 IF (tr_a) THEN
816 nrow = SIZE(new_values, 2)
817 ncol = SIZE(new_values, 1)
818 ELSE
819 nrow = SIZE(new_values, 1)
820 ncol = SIZE(new_values, 2)
821 END IF
822 IF (PRESENT(n_rows)) nrow = n_rows
823 IF (PRESENT(n_cols)) ncol = n_cols
824
825 full_block => fm%local_data
826
827 CALL cp_fm_get_info(matrix=fm, &
828 nrow_global=nrow_global, ncol_global=ncol_global, &
829 nrow_block=nrow_block, ncol_block=ncol_block, &
830 nrow_local=nrow_local, ncol_local=ncol_local, &
831 row_indices=row_indices, col_indices=col_indices)
832
833 IF (al == 1.0 .AND. be == 0.0) THEN
834 DO j = 1, ncol_local
835 this_col = col_indices(j) - j0 + 1
836 IF (this_col >= 1 .AND. this_col <= ncol) THEN
837 IF (tr_a) THEN
838 IF (i0 == 1 .AND. nrow_global == nrow) THEN
839 DO i = 1, nrow_local
840 full_block(i, j) = new_values(this_col, row_indices(i))
841 END DO
842 ELSE
843 DO i = 1, nrow_local
844 this_row = row_indices(i) - i0 + 1
845 IF (this_row >= 1 .AND. this_row <= nrow) THEN
846 full_block(i, j) = new_values(this_col, this_row)
847 END IF
848 END DO
849 END IF
850 ELSE
851 IF (i0 == 1 .AND. nrow_global == nrow) THEN
852 DO i = 1, nrow_local
853 full_block(i, j) = new_values(row_indices(i), this_col)
854 END DO
855 ELSE
856 DO i = 1, nrow_local
857 this_row = row_indices(i) - i0 + 1
858 IF (this_row >= 1 .AND. this_row <= nrow) THEN
859 full_block(i, j) = new_values(this_row, this_col)
860 END IF
861 END DO
862 END IF
863 END IF
864 END IF
865 END DO
866 ELSE
867 DO j = 1, ncol_local
868 this_col = col_indices(j) - j0 + 1
869 IF (this_col >= 1 .AND. this_col <= ncol) THEN
870 IF (tr_a) THEN
871 DO i = 1, nrow_local
872 this_row = row_indices(i) - i0 + 1
873 IF (this_row >= 1 .AND. this_row <= nrow) THEN
874 full_block(i, j) = al*new_values(this_col, this_row) + &
875 be*full_block(i, j)
876 END IF
877 END DO
878 ELSE
879 DO i = 1, nrow_local
880 this_row = row_indices(i) - i0 + 1
881 IF (this_row >= 1 .AND. this_row <= nrow) THEN
882 full_block(i, j) = al*new_values(this_row, this_col) + &
883 be*full_block(i, j)
884 END IF
885 END DO
886 END IF
887 END IF
888 END DO
889 END IF
890
891 ! CALL timestop(handle)
892
893 END SUBROUTINE cp_fm_set_submatrix
894
895! **************************************************************************************************
896!> \brief sets a submatrix of a full matrix to a given value
897!> fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = value
898!> \param fm the full to change
899!> \param new_value ...
900!> \param start_row the starting row of matrix
901!> \param start_col the starting col of matrix
902!> \param n_rows the number of rows to change
903!> \param n_cols the number of columns to change
904!> \par History
905!> 07.2002 created borrowing from Joost's blacs_replicated_copy [fawzi]
906!> 12.2025 created from cp_fm_set_submatrix
907!> \author JGH
908! **************************************************************************************************
909 SUBROUTINE cp_fm_set_all_submatrix(fm, new_value, start_row, start_col, n_rows, n_cols)
910 TYPE(cp_fm_type), INTENT(IN) :: fm
911 REAL(kind=dp), INTENT(in) :: new_value
912 INTEGER, INTENT(in) :: start_row, start_col, n_rows, n_cols
913
914 INTEGER :: i, i0, j, j0, ncol_global, ncol_local, &
915 nrow_global, nrow_local, this_col, &
916 this_row
917 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
918 REAL(kind=dp), DIMENSION(:, :), POINTER :: full_block
919
920 cpassert(.NOT. fm%use_sp)
921
922 full_block => fm%local_data
923
924 CALL cp_fm_get_info(matrix=fm, &
925 nrow_global=nrow_global, ncol_global=ncol_global, &
926 nrow_local=nrow_local, ncol_local=ncol_local, &
927 row_indices=row_indices, col_indices=col_indices)
928
929 i0 = start_row
930 j0 = start_col
931 DO j = 1, ncol_local
932 this_col = col_indices(j) - j0 + 1
933 IF (this_col >= 1 .AND. this_col <= n_cols) THEN
934 DO i = 1, nrow_local
935 this_row = row_indices(i) - i0 + 1
936 IF (this_row >= 1 .AND. this_row <= n_rows) THEN
937 full_block(i, j) = new_value
938 END IF
939 END DO
940 END IF
941 END DO
942
943 END SUBROUTINE cp_fm_set_all_submatrix
944
945! **************************************************************************************************
946!> \brief gets a submatrix of a full matrix
947!> op(target_m)(1:n_rows,1:n_cols)
948!> =fm(start_row:start_row+n_rows,start_col:start_col+n_cols)
949!> target_m is replicated on all cpus
950!> using this call is expensive
951!> \param fm the full you want to get the info from
952!> \param target_m a replicated full matrix that will contain the result
953!> \param start_row the starting row of b_matrix (defaults to 1)
954!> \param start_col the starting col of b_matrix (defaults to 1)
955!> \param n_rows the number of row to change in b (defaults to
956!> size(op(new_values),1))
957!> \param n_cols the number of columns to change in b (defaults to
958!> size(op(new_values),2))
959!> \param transpose if target_m should be transposed: if true
960!> op(target_m)=target_m^T, else op(target_m)=target_m
961!> (defaults to false)
962!> \par History
963!> 07.2002 created borrowing from Joost's blacs_replicated_copy [fawzi]
964!> \author Fawzi Mohamed
965!> \note
966!> optimized for full column updates. Zeros out a little too much
967!> of target_m
968!> the target_m is replicated and valid on all cpus
969! **************************************************************************************************
970 SUBROUTINE cp_fm_get_submatrix(fm, target_m, start_row, &
971 start_col, n_rows, n_cols, transpose)
972 TYPE(cp_fm_type), INTENT(IN) :: fm
973 REAL(kind=dp), DIMENSION(:, :), INTENT(out) :: target_m
974 INTEGER, INTENT(in), OPTIONAL :: start_row, start_col, n_rows, n_cols
975 LOGICAL, INTENT(in), OPTIONAL :: transpose
976
977 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_get_submatrix'
978
979 INTEGER :: handle, i, i0, j, j0, ncol, ncol_global, &
980 ncol_local, nrow, nrow_global, &
981 nrow_local, this_col, this_row
982 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
983 LOGICAL :: tr_a
984 REAL(kind=dp), DIMENSION(:, :), POINTER :: full_block
985 TYPE(mp_para_env_type), POINTER :: para_env
986
987 CALL timeset(routinen, handle)
988
989 i0 = 1; j0 = 1; tr_a = .false.
990
991 cpassert(.NOT. fm%use_sp)
992
993 IF (PRESENT(start_row)) i0 = start_row
994 IF (PRESENT(start_col)) j0 = start_col
995 IF (PRESENT(transpose)) tr_a = transpose
996 IF (tr_a) THEN
997 nrow = SIZE(target_m, 2)
998 ncol = SIZE(target_m, 1)
999 ELSE
1000 nrow = SIZE(target_m, 1)
1001 ncol = SIZE(target_m, 2)
1002 END IF
1003 IF (PRESENT(n_rows)) nrow = n_rows
1004 IF (PRESENT(n_cols)) ncol = n_cols
1005
1006 para_env => fm%matrix_struct%para_env
1007
1008 full_block => fm%local_data
1009#if defined(__parallel)
1010 ! zero-out whole target_m
1011 IF (SIZE(target_m, 1)*SIZE(target_m, 2) /= 0) THEN
1012 CALL dcopy(SIZE(target_m, 1)*SIZE(target_m, 2), [0.0_dp], 0, target_m, 1)
1013 END IF
1014#endif
1015
1016 CALL cp_fm_get_info(matrix=fm, &
1017 nrow_global=nrow_global, ncol_global=ncol_global, &
1018 nrow_local=nrow_local, ncol_local=ncol_local, &
1019 row_indices=row_indices, col_indices=col_indices)
1020
1021 DO j = 1, ncol_local
1022 this_col = col_indices(j) - j0 + 1
1023 IF (this_col >= 1 .AND. this_col <= ncol) THEN
1024 IF (tr_a) THEN
1025 IF (i0 == 1 .AND. nrow_global == nrow) THEN
1026 DO i = 1, nrow_local
1027 target_m(this_col, row_indices(i)) = full_block(i, j)
1028 END DO
1029 ELSE
1030 DO i = 1, nrow_local
1031 this_row = row_indices(i) - i0 + 1
1032 IF (this_row >= 1 .AND. this_row <= nrow) THEN
1033 target_m(this_col, this_row) = full_block(i, j)
1034 END IF
1035 END DO
1036 END IF
1037 ELSE
1038 IF (i0 == 1 .AND. nrow_global == nrow) THEN
1039 DO i = 1, nrow_local
1040 target_m(row_indices(i), this_col) = full_block(i, j)
1041 END DO
1042 ELSE
1043 DO i = 1, nrow_local
1044 this_row = row_indices(i) - i0 + 1
1045 IF (this_row >= 1 .AND. this_row <= nrow) THEN
1046 target_m(this_row, this_col) = full_block(i, j)
1047 END IF
1048 END DO
1049 END IF
1050 END IF
1051 END IF
1052 END DO
1053
1054 CALL para_env%sum(target_m)
1055
1056 CALL timestop(handle)
1057
1058 END SUBROUTINE cp_fm_get_submatrix
1059
1060! **************************************************************************************************
1061!> \brief returns all kind of information about the full matrix
1062!> \param matrix ...
1063!> \param name ...
1064!> \param nrow_global ...
1065!> \param ncol_global ...
1066!> \param nrow_block ...
1067!> \param ncol_block ...
1068!> \param nrow_local ...
1069!> \param ncol_local ...
1070!> \param row_indices ...
1071!> \param col_indices ...
1072!> \param local_data ...
1073!> \param context ...
1074!> \param nrow_locals ...
1075!> \param ncol_locals ...
1076!> \param matrix_struct ...
1077!> \param para_env ...
1078!> \note
1079!> see also cp_fm_struct for explanation
1080!> - nrow_local, ncol_local, row_indices, col_indices, local_data are hooks for efficient
1081!> access to the local blacs block
1082! **************************************************************************************************
1083 SUBROUTINE cp_fm_get_info(matrix, name, nrow_global, ncol_global, &
1084 nrow_block, ncol_block, nrow_local, ncol_local, &
1085 row_indices, col_indices, local_data, context, &
1086 nrow_locals, ncol_locals, matrix_struct, para_env)
1087
1088 TYPE(cp_fm_type), INTENT(IN) :: matrix
1089 CHARACTER(LEN=*), INTENT(OUT), OPTIONAL :: name
1090 INTEGER, INTENT(OUT), OPTIONAL :: nrow_global, ncol_global, nrow_block, &
1091 ncol_block, nrow_local, ncol_local
1092 INTEGER, DIMENSION(:), OPTIONAL, POINTER :: row_indices, col_indices
1093 REAL(kind=dp), CONTIGUOUS, DIMENSION(:, :), &
1094 OPTIONAL, POINTER :: local_data
1095 TYPE(cp_blacs_env_type), OPTIONAL, POINTER :: context
1096 INTEGER, DIMENSION(:), OPTIONAL, POINTER :: nrow_locals, ncol_locals
1097 TYPE(cp_fm_struct_type), OPTIONAL, POINTER :: matrix_struct
1098 TYPE(mp_para_env_type), OPTIONAL, POINTER :: para_env
1099
1100 IF (PRESENT(name)) name = matrix%name
1101 IF (PRESENT(matrix_struct)) matrix_struct => matrix%matrix_struct
1102 IF (PRESENT(local_data)) local_data => matrix%local_data ! not hiding things anymore :-(
1103
1104 CALL cp_fm_struct_get(matrix%matrix_struct, nrow_local=nrow_local, &
1105 ncol_local=ncol_local, nrow_global=nrow_global, &
1106 ncol_global=ncol_global, nrow_block=nrow_block, &
1107 ncol_block=ncol_block, row_indices=row_indices, &
1108 col_indices=col_indices, nrow_locals=nrow_locals, &
1109 ncol_locals=ncol_locals, context=context, para_env=para_env)
1110
1111 END SUBROUTINE cp_fm_get_info
1112
1113! **************************************************************************************************
1114!> \brief Write nicely formatted info about the FM to the given I/O unit (including the underlying FM struct)
1115!> \param matrix a cp_fm_type instance
1116!> \param io_unit the I/O unit to use for writing
1117! **************************************************************************************************
1118 SUBROUTINE cp_fm_write_info(matrix, io_unit)
1119 TYPE(cp_fm_type), INTENT(IN) :: matrix
1120 INTEGER, INTENT(IN) :: io_unit
1121
1122 WRITE (io_unit, '(/,A,A12)') "CP_FM | Name: ", matrix%name
1123 CALL cp_fm_struct_write_info(matrix%matrix_struct, io_unit)
1124 END SUBROUTINE cp_fm_write_info
1125
1126! **************************************************************************************************
1127!> \brief find the maximum absolute value of the matrix element
1128!> maxval(abs(matrix))
1129!> \param matrix ...
1130!> \param a_max ...
1131!> \param ir_max ...
1132!> \param ic_max ...
1133! **************************************************************************************************
1134 SUBROUTINE cp_fm_maxabsval(matrix, a_max, ir_max, ic_max)
1135 TYPE(cp_fm_type), INTENT(IN) :: matrix
1136 REAL(kind=dp), INTENT(OUT) :: a_max
1137 INTEGER, INTENT(OUT), OPTIONAL :: ir_max, ic_max
1138
1139 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_maxabsval'
1140
1141 INTEGER :: handle, i, ic_max_local, ir_max_local, &
1142 j, mepos, ncol_local, nrow_local, &
1143 num_pe
1144 INTEGER, ALLOCATABLE, DIMENSION(:) :: ic_max_vec, ir_max_vec
1145 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1146 REAL(dp) :: my_max
1147 REAL(dp), ALLOCATABLE, DIMENSION(:) :: a_max_vec
1148 REAL(kind=dp), DIMENSION(:, :), POINTER :: my_block
1149 REAL(kind=sp), DIMENSION(:, :), POINTER :: my_block_sp
1150
1151 CALL timeset(routinen, handle)
1152
1153 my_block => matrix%local_data
1154 my_block_sp => matrix%local_data_sp
1155
1156 CALL cp_fm_get_info(matrix, nrow_local=nrow_local, ncol_local=ncol_local, &
1157 row_indices=row_indices, col_indices=col_indices)
1158
1159 IF (matrix%use_sp) THEN
1160 a_max = real(maxval(abs(my_block_sp(1:nrow_local, 1:ncol_local))), dp)
1161 ELSE
1162 a_max = maxval(abs(my_block(1:nrow_local, 1:ncol_local)))
1163 END IF
1164
1165 IF (PRESENT(ir_max)) THEN
1166 num_pe = matrix%matrix_struct%para_env%num_pe
1167 mepos = matrix%matrix_struct%para_env%mepos
1168 ALLOCATE (ir_max_vec(0:num_pe - 1))
1169 ir_max_vec(0:num_pe - 1) = 0
1170 ALLOCATE (ic_max_vec(0:num_pe - 1))
1171 ic_max_vec(0:num_pe - 1) = 0
1172 ALLOCATE (a_max_vec(0:num_pe - 1))
1173 a_max_vec(0:num_pe - 1) = 0.0_dp
1174 my_max = 0.0_dp
1175
1176 IF ((ncol_local > 0) .AND. (nrow_local > 0)) THEN
1177 DO i = 1, ncol_local
1178 DO j = 1, nrow_local
1179 IF (matrix%use_sp) THEN
1180 IF (abs(my_block_sp(j, i)) > my_max) THEN
1181 my_max = real(my_block_sp(j, i), dp)
1182 ir_max_local = j
1183 ic_max_local = i
1184 END IF
1185 ELSE
1186 IF (abs(my_block(j, i)) > my_max) THEN
1187 my_max = my_block(j, i)
1188 ir_max_local = j
1189 ic_max_local = i
1190 END IF
1191 END IF
1192 END DO
1193 END DO
1194
1195 a_max_vec(mepos) = my_max
1196 ir_max_vec(mepos) = row_indices(ir_max_local)
1197 ic_max_vec(mepos) = col_indices(ic_max_local)
1198
1199 END IF
1200
1201 CALL matrix%matrix_struct%para_env%sum(a_max_vec)
1202 CALL matrix%matrix_struct%para_env%sum(ir_max_vec)
1203 CALL matrix%matrix_struct%para_env%sum(ic_max_vec)
1204
1205 my_max = 0.0_dp
1206 DO i = 0, num_pe - 1
1207 IF (a_max_vec(i) > my_max) THEN
1208 ir_max = ir_max_vec(i)
1209 ic_max = ic_max_vec(i)
1210 END IF
1211 END DO
1212
1213 DEALLOCATE (ir_max_vec, ic_max_vec, a_max_vec)
1214 cpassert(ic_max > 0)
1215 cpassert(ir_max > 0)
1216
1217 END IF
1218
1219 CALL matrix%matrix_struct%para_env%max(a_max)
1220
1221 CALL timestop(handle)
1222
1223 END SUBROUTINE cp_fm_maxabsval
1224
1225! **************************************************************************************************
1226!> \brief find the maximum over the rows of the sum of the absolute values of the elements of a given row
1227!> = || A ||_infinity
1228!> \param matrix ...
1229!> \param a_max ...
1230!> \note
1231!> for a real symmetric matrix it holds that || A ||_2 = |lambda_max| < || A ||_infinity
1232!> Hence this can be used to estimate an upper bound for the eigenvalues of a matrix
1233!> http://mathworld.wolfram.com/MatrixNorm.html
1234!> (but the bound is not so tight in the general case)
1235! **************************************************************************************************
1236 SUBROUTINE cp_fm_maxabsrownorm(matrix, a_max)
1237 TYPE(cp_fm_type), INTENT(IN) :: matrix
1238 REAL(kind=dp), INTENT(OUT) :: a_max
1239
1240 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_maxabsrownorm'
1241
1242 INTEGER :: handle, i, j, ncol_local, nrow_global, &
1243 nrow_local
1244 INTEGER, DIMENSION(:), POINTER :: row_indices
1245 REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: values
1246 REAL(kind=dp), DIMENSION(:, :), POINTER :: my_block
1247
1248 CALL timeset(routinen, handle)
1249
1250 my_block => matrix%local_data
1251
1252 cpassert(.NOT. matrix%use_sp)
1253
1254 CALL cp_fm_get_info(matrix, row_indices=row_indices, nrow_global=nrow_global, &
1255 nrow_local=nrow_local, ncol_local=ncol_local)
1256
1257 ! the efficiency could be improved by making use of the row-col distribution of scalapack
1258 ALLOCATE (values(nrow_global))
1259 values = 0.0_dp
1260 DO j = 1, ncol_local
1261 DO i = 1, nrow_local
1262 values(row_indices(i)) = values(row_indices(i)) + abs(my_block(i, j))
1263 END DO
1264 END DO
1265 CALL matrix%matrix_struct%para_env%sum(values)
1266 a_max = maxval(values)
1267 DEALLOCATE (values)
1268
1269 CALL timestop(handle)
1270 END SUBROUTINE cp_fm_maxabsrownorm
1271
1272! **************************************************************************************************
1273!> \brief find the inorm of each column norm_{j}= sqrt( \sum_{i} A_{ij}*A_{ij} )
1274!> \param matrix ...
1275!> \param norm_array ...
1276! **************************************************************************************************
1277 SUBROUTINE cp_fm_vectorsnorm(matrix, norm_array)
1278 TYPE(cp_fm_type), INTENT(IN) :: matrix
1279 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: norm_array
1280
1281 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_vectorsnorm'
1282
1283 INTEGER :: handle, i, j, ncol_global, ncol_local, &
1284 nrow_local
1285 INTEGER, DIMENSION(:), POINTER :: col_indices
1286 REAL(kind=dp), DIMENSION(:, :), POINTER :: my_block
1287
1288 CALL timeset(routinen, handle)
1289
1290 my_block => matrix%local_data
1291
1292 cpassert(.NOT. matrix%use_sp)
1293
1294 CALL cp_fm_get_info(matrix, col_indices=col_indices, ncol_global=ncol_global, &
1295 nrow_local=nrow_local, ncol_local=ncol_local)
1296
1297 ! the efficiency could be improved by making use of the row-col distribution of scalapack
1298 norm_array = 0.0_dp
1299 DO j = 1, ncol_local
1300 DO i = 1, nrow_local
1301 norm_array(col_indices(j)) = norm_array(col_indices(j)) + my_block(i, j)*my_block(i, j)
1302 END DO
1303 END DO
1304 CALL matrix%matrix_struct%para_env%sum(norm_array)
1305 norm_array = sqrt(norm_array)
1306
1307 CALL timestop(handle)
1308 END SUBROUTINE cp_fm_vectorsnorm
1309
1310! **************************************************************************************************
1311!> \brief summing up all the elements along the matrix's i-th index
1312!> \f$ \mathrm{sum}_{j} = \sum_{i} A_{ij} \f$
1313!> or
1314!> \f$ \mathrm{sum}_{i} = \sum_{j} A_{ij} \f$
1315!> \param matrix an input matrix A
1316!> \param sum_array sums of elements in each column/row
1317!> \param dir ...
1318!> \note forked from cp_fm_vectorsnorm() to be used with
1319!> the maximum overlap method
1320!> added row variation
1321! **************************************************************************************************
1322 SUBROUTINE cp_fm_vectorssum(matrix, sum_array, dir)
1323 TYPE(cp_fm_type), INTENT(IN) :: matrix
1324 REAL(kind=dp), DIMENSION(:), INTENT(OUT) :: sum_array
1325 CHARACTER(LEN=1), INTENT(IN), OPTIONAL :: dir
1326
1327 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_vectorssum'
1328
1329 INTEGER :: handle, i, j, ncol_local, nrow_local
1330 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1331 LOGICAL :: docol
1332 REAL(kind=dp), DIMENSION(:, :), POINTER :: my_block
1333
1334 CALL timeset(routinen, handle)
1335
1336 IF (PRESENT(dir)) THEN
1337 IF (dir == 'c' .OR. dir == 'C') THEN
1338 docol = .true.
1339 ELSEIF (dir == 'r' .OR. dir == 'R') THEN
1340 docol = .false.
1341 ELSE
1342 cpabort('Wrong argument DIR')
1343 END IF
1344 ELSE
1345 docol = .true.
1346 END IF
1347
1348 my_block => matrix%local_data
1349
1350 cpassert(.NOT. matrix%use_sp)
1351
1352 CALL cp_fm_get_info(matrix, col_indices=col_indices, row_indices=row_indices, &
1353 nrow_local=nrow_local, ncol_local=ncol_local)
1354
1355 ! the efficiency could be improved by making use of the row-col distribution of scalapack
1356 sum_array(:) = 0.0_dp
1357 IF (docol) THEN
1358 DO j = 1, ncol_local
1359 DO i = 1, nrow_local
1360 sum_array(col_indices(j)) = sum_array(col_indices(j)) + my_block(i, j)
1361 END DO
1362 END DO
1363 ELSE
1364 DO j = 1, ncol_local
1365 DO i = 1, nrow_local
1366 sum_array(row_indices(i)) = sum_array(row_indices(i)) + my_block(i, j)
1367 END DO
1368 END DO
1369 END IF
1370 CALL matrix%matrix_struct%para_env%sum(sum_array)
1371
1372 CALL timestop(handle)
1373 END SUBROUTINE cp_fm_vectorssum
1374
1375! **************************************************************************************************
1376!> \brief copy one identically sized matrix in the other
1377!> \param source ...
1378!> \param destination ...
1379!> \note
1380!> see also cp_fm_to_fm_columns
1381! **************************************************************************************************
1382 SUBROUTINE cp_fm_to_fm_matrix(source, destination)
1383
1384 TYPE(cp_fm_type), INTENT(IN) :: source, destination
1385
1386 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_to_fm_matrix'
1387
1388 INTEGER :: handle, npcol, nprow
1389
1390 CALL timeset(routinen, handle)
1391
1392 nprow = source%matrix_struct%context%num_pe(1)
1393 npcol = source%matrix_struct%context%num_pe(2)
1394
1395 IF ((.NOT. cp2k_is_parallel) .OR. &
1396 cp_fm_struct_equivalent(source%matrix_struct, &
1397 destination%matrix_struct)) THEN
1398 IF (source%use_sp .AND. destination%use_sp) THEN
1399 IF (SIZE(source%local_data_sp, 1) /= SIZE(destination%local_data_sp, 1) .OR. &
1400 SIZE(source%local_data_sp, 2) /= SIZE(destination%local_data_sp, 2)) &
1401 CALL cp_abort(__location__, &
1402 "Cannot copy full matrix <"//trim(source%name)// &
1403 "> to full matrix <"//trim(destination%name)// &
1404 ">. The local_data blocks have different sizes.")
1405 CALL scopy(SIZE(source%local_data_sp, 1)*SIZE(source%local_data_sp, 2), &
1406 source%local_data_sp, 1, destination%local_data_sp, 1)
1407 ELSE IF (source%use_sp .AND. .NOT. destination%use_sp) THEN
1408 IF (SIZE(source%local_data_sp, 1) /= SIZE(destination%local_data, 1) .OR. &
1409 SIZE(source%local_data_sp, 2) /= SIZE(destination%local_data, 2)) &
1410 CALL cp_abort(__location__, &
1411 "Cannot copy full matrix <"//trim(source%name)// &
1412 "> to full matrix <"//trim(destination%name)// &
1413 ">. The local_data blocks have different sizes.")
1414 destination%local_data = real(source%local_data_sp, dp)
1415 ELSE IF (.NOT. source%use_sp .AND. destination%use_sp) THEN
1416 IF (SIZE(source%local_data, 1) /= SIZE(destination%local_data_sp, 1) .OR. &
1417 SIZE(source%local_data, 2) /= SIZE(destination%local_data_sp, 2)) &
1418 CALL cp_abort(__location__, &
1419 "Cannot copy full matrix <"//trim(source%name)// &
1420 "> to full matrix <"//trim(destination%name)// &
1421 ">. The local_data blocks have different sizes.")
1422 destination%local_data_sp = real(source%local_data, sp)
1423 ELSE
1424 IF (SIZE(source%local_data, 1) /= SIZE(destination%local_data, 1) .OR. &
1425 SIZE(source%local_data, 2) /= SIZE(destination%local_data, 2)) &
1426 CALL cp_abort(__location__, &
1427 "Cannot copy full matrix <"//trim(source%name)// &
1428 "> to full matrix <"//trim(destination%name)// &
1429 ">. The local_data blocks have different sizes.")
1430 CALL dcopy(SIZE(source%local_data, 1)*SIZE(source%local_data, 2), &
1431 source%local_data, 1, destination%local_data, 1)
1432 END IF
1433 ELSE
1434 cpabort("Data structures of source and target full matrix are not equivalent")
1435 END IF
1436
1437 CALL timestop(handle)
1438
1439 END SUBROUTINE cp_fm_to_fm_matrix
1440
1441! **************************************************************************************************
1442!> \brief copy just a subset of columns of a fm to a fm
1443!> \param msource ...
1444!> \param mtarget ...
1445!> \param ncol ...
1446!> \param source_start ...
1447!> \param target_start ...
1448! **************************************************************************************************
1449 SUBROUTINE cp_fm_to_fm_columns(msource, mtarget, ncol, source_start, &
1450 target_start)
1451
1452 TYPE(cp_fm_type), INTENT(IN) :: msource, mtarget
1453 INTEGER, INTENT(IN) :: ncol
1454 INTEGER, INTENT(IN), OPTIONAL :: source_start, target_start
1455
1456 CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_fm_to_fm_columns'
1457
1458 INTEGER :: handle, n, ss, ts
1459 REAL(KIND=dp), DIMENSION(:, :), POINTER :: a, b
1460#if defined(__parallel)
1461 INTEGER :: i
1462 INTEGER, DIMENSION(9) :: desca, descb
1463#endif
1464
1465 CALL timeset(routinen, handle)
1466
1467 ss = 1
1468 ts = 1
1469
1470 IF (PRESENT(source_start)) ss = source_start
1471 IF (PRESENT(target_start)) ts = target_start
1472
1473 n = msource%matrix_struct%nrow_global
1474
1475 a => msource%local_data
1476 b => mtarget%local_data
1477
1478#if defined(__parallel)
1479 desca(:) = msource%matrix_struct%descriptor(:)
1480 descb(:) = mtarget%matrix_struct%descriptor(:)
1481 DO i = 0, ncol - 1
1482 CALL pdcopy(n, a, 1, ss + i, desca, 1, b, 1, ts + i, descb, 1)
1483 END DO
1484#else
1485 IF (ss <= SIZE(a, 2) .AND. ts <= SIZE(b, 2)) THEN
1486 CALL dcopy(ncol*n, a(:, ss), 1, b(:, ts), 1)
1487 END IF
1488#endif
1489
1490 CALL timestop(handle)
1491
1492 END SUBROUTINE cp_fm_to_fm_columns
1493
1494! **************************************************************************************************
1495!> \brief copy just a triangular matrix
1496!> \param msource ...
1497!> \param mtarget ...
1498!> \param uplo ...
1499! **************************************************************************************************
1500 SUBROUTINE cp_fm_to_fm_triangular(msource, mtarget, uplo)
1501
1502 TYPE(cp_fm_type), INTENT(IN) :: msource, mtarget
1503 CHARACTER(LEN=1), OPTIONAL, INTENT(IN) :: uplo
1504
1505 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_to_fm_triangular'
1506
1507 CHARACTER(LEN=1) :: myuplo
1508 INTEGER :: handle, ncol, nrow
1509 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b
1510#if defined(__parallel)
1511 INTEGER, DIMENSION(9) :: desca, descb
1512#endif
1513
1514 CALL timeset(routinen, handle)
1515
1516 myuplo = 'U'
1517 IF (PRESENT(uplo)) myuplo = uplo
1518
1519 nrow = msource%matrix_struct%nrow_global
1520 ncol = msource%matrix_struct%ncol_global
1521
1522 a => msource%local_data
1523 b => mtarget%local_data
1524
1525#if defined(__parallel)
1526 desca(:) = msource%matrix_struct%descriptor(:)
1527 descb(:) = mtarget%matrix_struct%descriptor(:)
1528 CALL pdlacpy(myuplo, nrow, ncol, a(1, 1), 1, 1, desca, b(1, 1), 1, 1, descb)
1529#else
1530 CALL dlacpy(myuplo, nrow, ncol, a(1, 1), nrow, b(1, 1), nrow)
1531#endif
1532
1533 CALL timestop(handle)
1534
1535 END SUBROUTINE cp_fm_to_fm_triangular
1536
1537! **************************************************************************************************
1538!> \brief copy just a part ot the matrix
1539!> \param msource ...
1540!> \param mtarget ...
1541!> \param nrow ...
1542!> \param ncol ...
1543!> \param s_firstrow ...
1544!> \param s_firstcol ...
1545!> \param t_firstrow ...
1546!> \param t_firstcol ...
1547! **************************************************************************************************
1548
1549 SUBROUTINE cp_fm_to_fm_submat(msource, mtarget, nrow, ncol, s_firstrow, s_firstcol, t_firstrow, t_firstcol)
1550
1551 TYPE(cp_fm_type), INTENT(IN) :: msource, mtarget
1552 INTEGER, INTENT(IN) :: nrow, ncol, s_firstrow, &
1553 s_firstcol, t_firstrow, &
1554 t_firstcol
1555
1556 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_to_fm_submat'
1557
1558 INTEGER :: handle, i, na, nb, ss, ts
1559 REAL(kind=dp), DIMENSION(:, :), POINTER :: a, b
1560#if defined(__parallel)
1561 INTEGER, DIMENSION(9) :: desca, descb
1562#endif
1563
1564 CALL timeset(routinen, handle)
1565
1566 a => msource%local_data
1567 b => mtarget%local_data
1568
1569 na = msource%matrix_struct%nrow_global
1570 nb = mtarget%matrix_struct%nrow_global
1571! nrow must be <= na and nb
1572 IF (nrow > na) &
1573 cpabort("cannot copy because nrow > number of rows of source matrix")
1574 IF (nrow > nb) &
1575 cpabort("cannot copy because nrow > number of rows of target matrix")
1576 na = msource%matrix_struct%ncol_global
1577 nb = mtarget%matrix_struct%ncol_global
1578! ncol must be <= na_col and nb_col
1579 IF (ncol > na) &
1580 cpabort("cannot copy because nrow > number of rows of source matrix")
1581 IF (ncol > nb) &
1582 cpabort("cannot copy because nrow > number of rows of target matrix")
1583
1584#if defined(__parallel)
1585 desca(:) = msource%matrix_struct%descriptor(:)
1586 descb(:) = mtarget%matrix_struct%descriptor(:)
1587 DO i = 0, ncol - 1
1588 ss = s_firstcol + i
1589 ts = t_firstcol + i
1590 CALL pdcopy(nrow, a, s_firstrow, ss, desca, 1, b, t_firstrow, ts, descb, 1)
1591 END DO
1592#else
1593 DO i = 0, ncol - 1
1594 ss = s_firstcol + i
1595 ts = t_firstcol + i
1596 CALL dcopy(nrow, a(s_firstrow:, ss), 1, b(t_firstrow:, ts), 1)
1597 END DO
1598#endif
1599
1600 CALL timestop(handle)
1601 END SUBROUTINE cp_fm_to_fm_submat
1602
1603! **************************************************************************************************
1604!> \brief General copy of a fm matrix to another fm matrix.
1605!> Uses non-blocking MPI rather than ScaLAPACK.
1606!>
1607!> \param source input fm matrix
1608!> \param destination output fm matrix
1609!> \param para_env parallel environment corresponding to the BLACS env that covers all parts
1610!> of the input and output matrices
1611!> \par History
1612!> 31-Jan-2017 : Re-implemented using non-blocking MPI [IainB, MarkT]
1613! **************************************************************************************************
1614 SUBROUTINE cp_fm_copy_general(source, destination, para_env)
1615 TYPE(cp_fm_type), INTENT(IN) :: source, destination
1616 TYPE(mp_para_env_type), INTENT(IN) :: para_env
1617
1618 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_copy_general'
1619
1620 INTEGER :: handle
1621 TYPE(copy_info_type) :: info
1622
1623 CALL timeset(routinen, handle)
1624
1625 CALL cp_fm_start_copy_general(source, destination, para_env, info)
1626 IF (ASSOCIATED(destination%matrix_struct)) THEN
1627 CALL cp_fm_finish_copy_general(destination, info)
1628 END IF
1629 IF (ASSOCIATED(source%matrix_struct)) THEN
1631 END IF
1632
1633 CALL timestop(handle)
1634 END SUBROUTINE cp_fm_copy_general
1635
1636! **************************************************************************************************
1637!> \brief Initiates the copy operation: get distribution data, post MPI isend and irecvs
1638!> \param source input fm matrix
1639!> \param destination output fm matrix
1640!> \param para_env parallel environment corresponding to the BLACS env that covers all parts
1641!> of the input and output matrices
1642!> \param info all of the data that will be needed to complete the copy operation
1643! **************************************************************************************************
1644 SUBROUTINE cp_fm_start_copy_general(source, destination, para_env, info)
1645 TYPE(cp_fm_type), INTENT(IN) :: source, destination
1646 TYPE(mp_para_env_type), INTENT(IN) :: para_env
1647 TYPE(copy_info_type), INTENT(OUT) :: info
1648
1649 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_start_copy_general'
1650
1651 INTEGER :: dest_p_i, dest_q_j, global_rank, global_size, handle, i, j, k, mpi_rank, &
1652 ncol_block_dest, ncol_block_src, ncol_local_recv, ncol_local_send, ncols, &
1653 nrow_block_dest, nrow_block_src, nrow_local_recv, nrow_local_send, nrows, p, q, &
1654 recv_rank, recv_size, send_rank, send_size
1655 INTEGER, ALLOCATABLE, DIMENSION(:) :: all_ranks, dest2global, dest_p, dest_q, &
1656 recv_count, send_count, send_disp, &
1657 source2global, src_p, src_q
1658 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: dest_blacs2mpi
1659 INTEGER, DIMENSION(2) :: dest_block, dest_block_tmp, dest_num_pe, &
1660 src_block, src_block_tmp, src_num_pe
1661 INTEGER, DIMENSION(:), POINTER :: recv_col_indices, recv_row_indices, &
1662 send_col_indices, send_row_indices
1663 TYPE(cp_fm_struct_type), POINTER :: recv_dist, send_dist
1664 TYPE(mp_request_type), DIMENSION(6) :: recv_req, send_req
1665
1666 CALL timeset(routinen, handle)
1667
1668 IF (.NOT. cp2k_is_parallel) THEN
1669 ! Just copy all of the matrix data into a 'send buffer', to be unpacked later
1670 nrow_local_send = SIZE(source%local_data, 1)
1671 ncol_local_send = SIZE(source%local_data, 2)
1672 ALLOCATE (info%send_buf(nrow_local_send*ncol_local_send))
1673 k = 0
1674 DO j = 1, ncol_local_send
1675 DO i = 1, nrow_local_send
1676 k = k + 1
1677 info%send_buf(k) = source%local_data(i, j)
1678 END DO
1679 END DO
1680 ELSE
1681 ! assumption of double precision data is carried forward from ScaLAPACK version
1682 IF (source%use_sp) cpabort("only DP kind implemented")
1683 IF (destination%use_sp) cpabort("only DP kind implemented")
1684
1685 NULLIFY (recv_dist, send_dist)
1686 NULLIFY (recv_col_indices, recv_row_indices, send_col_indices, send_row_indices)
1687
1688 ! The 'global' communicator contains both the source and destination decompositions
1689 global_size = para_env%num_pe
1690 global_rank = para_env%mepos
1691
1692 ! The source/send decomposition and destination/recv decompositions may only exist on
1693 ! on a subset of the processes involved in the communication
1694 ! Check if the source and/or destination arguments are .not. ASSOCIATED():
1695 ! if so, skip the send / recv parts (since these processes do not participate in the sending/receiving distribution)
1696 IF (ASSOCIATED(destination%matrix_struct)) THEN
1697 recv_dist => destination%matrix_struct
1698 recv_rank = recv_dist%para_env%mepos
1699 ELSE
1700 recv_rank = mp_proc_null
1701 END IF
1702
1703 IF (ASSOCIATED(source%matrix_struct)) THEN
1704 send_dist => source%matrix_struct
1705 send_rank = send_dist%para_env%mepos
1706 ELSE
1707 send_rank = mp_proc_null
1708 END IF
1709
1710 ! Map the rank in the source/dest communicator to the global rank
1711 ALLOCATE (all_ranks(0:global_size - 1))
1712
1713 CALL para_env%allgather(send_rank, all_ranks)
1714 IF (ASSOCIATED(recv_dist)) THEN
1715 ALLOCATE (source2global(0:count(all_ranks /= mp_proc_null) - 1))
1716 DO i = 0, global_size - 1
1717 IF (all_ranks(i) /= mp_proc_null) THEN
1718 source2global(all_ranks(i)) = i
1719 END IF
1720 END DO
1721 END IF
1722
1723 CALL para_env%allgather(recv_rank, all_ranks)
1724 IF (ASSOCIATED(send_dist)) THEN
1725 ALLOCATE (dest2global(0:count(all_ranks /= mp_proc_null) - 1))
1726 DO i = 0, global_size - 1
1727 IF (all_ranks(i) /= mp_proc_null) THEN
1728 dest2global(all_ranks(i)) = i
1729 END IF
1730 END DO
1731 END IF
1732 DEALLOCATE (all_ranks)
1733
1734 ! Some data from the two decompositions will be needed by all processes in the global group :
1735 ! process grid shape, block size, and the BLACS-to-MPI mapping
1736
1737 ! The global root process will receive the data (from the root process in each decomposition)
1738 send_req(:) = mp_request_null
1739 IF (global_rank == 0) THEN
1740 recv_req(:) = mp_request_null
1741 CALL para_env%irecv(src_block, mp_any_source, recv_req(1), tag=src_tag)
1742 CALL para_env%irecv(dest_block, mp_any_source, recv_req(2), tag=dest_tag)
1743 CALL para_env%irecv(src_num_pe, mp_any_source, recv_req(3), tag=src_tag)
1744 CALL para_env%irecv(dest_num_pe, mp_any_source, recv_req(4), tag=dest_tag)
1745 END IF
1746
1747 IF (ASSOCIATED(send_dist)) THEN
1748 IF ((send_rank == 0)) THEN
1749 ! need to use separate buffers here in case this is actually global rank 0
1750 src_block_tmp = [send_dist%nrow_block, send_dist%ncol_block]
1751 CALL para_env%isend(src_block_tmp, 0, send_req(1), tag=src_tag)
1752 CALL para_env%isend(send_dist%context%num_pe, 0, send_req(2), tag=src_tag)
1753 END IF
1754 END IF
1755
1756 IF (ASSOCIATED(recv_dist)) THEN
1757 IF ((recv_rank == 0)) THEN
1758 dest_block_tmp = [recv_dist%nrow_block, recv_dist%ncol_block]
1759 CALL para_env%isend(dest_block_tmp, 0, send_req(3), tag=dest_tag)
1760 CALL para_env%isend(recv_dist%context%num_pe, 0, send_req(4), tag=dest_tag)
1761 END IF
1762 END IF
1763
1764 IF (global_rank == 0) THEN
1765 CALL mp_waitall(recv_req(1:4))
1766 ! Now we know the process decomposition, we can allocate the arrays to hold the blacs2mpi mapping
1767 ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
1768 dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1) &
1769 )
1770 CALL para_env%irecv(info%src_blacs2mpi, mp_any_source, recv_req(5), tag=src_tag)
1771 CALL para_env%irecv(dest_blacs2mpi, mp_any_source, recv_req(6), tag=dest_tag)
1772 END IF
1773
1774 IF (ASSOCIATED(send_dist)) THEN
1775 IF ((send_rank == 0)) THEN
1776 CALL para_env%isend(send_dist%context%blacs2mpi(:, :), 0, send_req(5), tag=src_tag)
1777 END IF
1778 END IF
1779
1780 IF (ASSOCIATED(recv_dist)) THEN
1781 IF ((recv_rank == 0)) THEN
1782 CALL para_env%isend(recv_dist%context%blacs2mpi(:, :), 0, send_req(6), tag=dest_tag)
1783 END IF
1784 END IF
1785
1786 IF (global_rank == 0) THEN
1787 CALL mp_waitall(recv_req(5:6))
1788 END IF
1789
1790 ! Finally, broadcast the data to all processes in the global communicator
1791 CALL para_env%bcast(src_block, 0)
1792 CALL para_env%bcast(dest_block, 0)
1793 CALL para_env%bcast(src_num_pe, 0)
1794 CALL para_env%bcast(dest_num_pe, 0)
1795 info%src_num_pe(1:2) = src_num_pe(1:2)
1796 info%nblock_src(1:2) = src_block(1:2)
1797 IF (global_rank /= 0) THEN
1798 ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
1799 dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1) &
1800 )
1801 END IF
1802 CALL para_env%bcast(info%src_blacs2mpi, 0)
1803 CALL para_env%bcast(dest_blacs2mpi, 0)
1804
1805 recv_size = dest_num_pe(1)*dest_num_pe(2)
1806 send_size = src_num_pe(1)*src_num_pe(2)
1807 info%send_size = send_size
1808 CALL mp_waitall(send_req(:))
1809
1810 ! Setup is now complete, we can start the actual communication here.
1811 ! The order implemented here is:
1812 ! DEST_1
1813 ! compute recv sizes
1814 ! call irecv
1815 ! SRC_1
1816 ! compute send sizes
1817 ! pack send buffers
1818 ! call isend
1819 ! DEST_2
1820 ! wait for the recvs and unpack buffers (this part eventually will go into another routine to allow comms to run concurrently)
1821 ! SRC_2
1822 ! wait for the sends
1823
1824 ! DEST_1
1825 IF (ASSOCIATED(recv_dist)) THEN
1826 CALL cp_fm_struct_get(recv_dist, row_indices=recv_row_indices, &
1827 col_indices=recv_col_indices &
1828 )
1829 info%recv_col_indices => recv_col_indices
1830 info%recv_row_indices => recv_row_indices
1831 nrow_block_src = src_block(1)
1832 ncol_block_src = src_block(2)
1833 ALLOCATE (recv_count(0:send_size - 1), info%recv_disp(0:send_size - 1), info%recv_request(0:send_size - 1))
1834
1835 ! Determine the recv counts, allocate the receive buffers, call mpi_irecv for all the non-zero sized receives
1836 nrow_local_recv = recv_dist%nrow_locals(recv_dist%context%mepos(1))
1837 ncol_local_recv = recv_dist%ncol_locals(recv_dist%context%mepos(2))
1838 info%nlocal_recv(1) = nrow_local_recv
1839 info%nlocal_recv(2) = ncol_local_recv
1840 ! Initialise src_p, src_q arrays (sized using number of rows/cols in the receiving distribution)
1841 ALLOCATE (src_p(nrow_local_recv), src_q(ncol_local_recv))
1842 DO i = 1, nrow_local_recv
1843 ! For each local row we will receive, we look up its global row (in recv_row_indices),
1844 ! then work out which row block it comes from, and which process row that row block comes from.
1845 src_p(i) = mod(((recv_row_indices(i) - 1)/nrow_block_src), src_num_pe(1))
1846 END DO
1847 DO j = 1, ncol_local_recv
1848 ! Similarly for the columns
1849 src_q(j) = mod(((recv_col_indices(j) - 1)/ncol_block_src), src_num_pe(2))
1850 END DO
1851 ! src_p/q now contains the process row/column ID that will send data to that row/column
1852
1853 DO q = 0, src_num_pe(2) - 1
1854 ncols = count(src_q == q)
1855 DO p = 0, src_num_pe(1) - 1
1856 nrows = count(src_p == p)
1857 ! Use the send_dist here as we are looking up the processes where the data comes from
1858 recv_count(info%src_blacs2mpi(p, q)) = nrows*ncols
1859 END DO
1860 END DO
1861 DEALLOCATE (src_p, src_q)
1862
1863 ! Use one long buffer (and displacements into that buffer)
1864 ! this prevents the need for a rectangular array where not all elements will be populated
1865 ALLOCATE (info%recv_buf(sum(recv_count(:))))
1866 info%recv_disp(0) = 0
1867 DO i = 1, send_size - 1
1868 info%recv_disp(i) = info%recv_disp(i - 1) + recv_count(i - 1)
1869 END DO
1870
1871 ! Issue receive calls on ranks which expect data
1872 DO k = 0, send_size - 1
1873 IF (recv_count(k) > 0) THEN
1874 CALL para_env%irecv(info%recv_buf(info%recv_disp(k) + 1:info%recv_disp(k) + recv_count(k)), &
1875 source2global(k), info%recv_request(k))
1876 END IF
1877 END DO
1878 DEALLOCATE (source2global)
1879 END IF ! ASSOCIATED(recv_dist)
1880
1881 ! SRC_1
1882 IF (ASSOCIATED(send_dist)) THEN
1883 CALL cp_fm_struct_get(send_dist, row_indices=send_row_indices, &
1884 col_indices=send_col_indices &
1885 )
1886 nrow_block_dest = dest_block(1)
1887 ncol_block_dest = dest_block(2)
1888 ALLOCATE (send_count(0:recv_size - 1), send_disp(0:recv_size - 1), info%send_request(0:recv_size - 1))
1889
1890 ! Determine the send counts, allocate the send buffers
1891 nrow_local_send = send_dist%nrow_locals(send_dist%context%mepos(1))
1892 ncol_local_send = send_dist%ncol_locals(send_dist%context%mepos(2))
1893
1894 ! Initialise dest_p, dest_q arrays (sized nrow_local, ncol_local)
1895 ! i.e. number of rows,cols in the sending distribution
1896 ALLOCATE (dest_p(nrow_local_send), dest_q(ncol_local_send))
1897
1898 DO i = 1, nrow_local_send
1899 ! Use the send_dist%row_indices() here (we are looping over the local rows we will send)
1900 dest_p(i) = mod(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
1901 END DO
1902 DO j = 1, ncol_local_send
1903 dest_q(j) = mod(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
1904 END DO
1905 ! dest_p/q now contain the process row/column ID that will receive data from that row/column
1906
1907 DO q = 0, dest_num_pe(2) - 1
1908 ncols = count(dest_q == q)
1909 DO p = 0, dest_num_pe(1) - 1
1910 nrows = count(dest_p == p)
1911 send_count(dest_blacs2mpi(p, q)) = nrows*ncols
1912 END DO
1913 END DO
1914 DEALLOCATE (dest_p, dest_q)
1915
1916 ! Allocate the send buffer using send_count -- and calculate the offset into the buffer for each process
1917 ALLOCATE (info%send_buf(sum(send_count(:))))
1918 send_disp(0) = 0
1919 DO k = 1, recv_size - 1
1920 send_disp(k) = send_disp(k - 1) + send_count(k - 1)
1921 END DO
1922
1923 ! Loop over the smat, pack the send buffers
1924 send_count(:) = 0
1925 DO j = 1, ncol_local_send
1926 ! Use send_col_indices and row_indices here, as we are looking up the global row/column number of local rows.
1927 dest_q_j = mod(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
1928 DO i = 1, nrow_local_send
1929 dest_p_i = mod(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
1930 mpi_rank = dest_blacs2mpi(dest_p_i, dest_q_j)
1931 send_count(mpi_rank) = send_count(mpi_rank) + 1
1932 info%send_buf(send_disp(mpi_rank) + send_count(mpi_rank)) = source%local_data(i, j)
1933 END DO
1934 END DO
1935
1936 ! For each non-zero send_count, call mpi_isend
1937 DO k = 0, recv_size - 1
1938 IF (send_count(k) > 0) THEN
1939 CALL para_env%isend(info%send_buf(send_disp(k) + 1:send_disp(k) + send_count(k)), &
1940 dest2global(k), info%send_request(k))
1941 END IF
1942 END DO
1943 DEALLOCATE (send_count, send_disp, dest2global)
1944 END IF ! ASSOCIATED(send_dist)
1945 DEALLOCATE (dest_blacs2mpi)
1946
1947 END IF !IF (.NOT. cp2k_is_parallel)
1948
1949 CALL timestop(handle)
1950
1951 END SUBROUTINE cp_fm_start_copy_general
1952
1953! **************************************************************************************************
1954!> \brief Completes the copy operation: wait for comms, unpack, clean up MPI state
1955!> \param destination output fm matrix
1956!> \param info all of the data that will be needed to complete the copy operation
1957! **************************************************************************************************
1958 SUBROUTINE cp_fm_finish_copy_general(destination, info)
1959 TYPE(cp_fm_type), INTENT(IN) :: destination
1960 TYPE(copy_info_type), INTENT(INOUT) :: info
1961
1962 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_finish_copy_general'
1963
1964 INTEGER :: handle, i, j, k, mpi_rank, send_size, &
1965 src_p_i, src_q_j
1966 INTEGER, ALLOCATABLE, DIMENSION(:) :: recv_count
1967 INTEGER, DIMENSION(2) :: nblock_src, nlocal_recv, src_num_pe
1968 INTEGER, DIMENSION(:), POINTER :: recv_col_indices, recv_row_indices
1969
1970 CALL timeset(routinen, handle)
1971
1972 IF (.NOT. cp2k_is_parallel) THEN
1973 ! Now unpack the data from the 'send buffer'
1974 k = 0
1975 DO j = 1, SIZE(destination%local_data, 2)
1976 DO i = 1, SIZE(destination%local_data, 1)
1977 k = k + 1
1978 destination%local_data(i, j) = info%send_buf(k)
1979 END DO
1980 END DO
1981 DEALLOCATE (info%send_buf)
1982 ELSE
1983 ! Set up local variables ...
1984 send_size = info%send_size
1985 nlocal_recv(1:2) = info%nlocal_recv(:)
1986 nblock_src(1:2) = info%nblock_src(:)
1987 src_num_pe(1:2) = info%src_num_pe(:)
1988 recv_col_indices => info%recv_col_indices
1989 recv_row_indices => info%recv_row_indices
1990
1991 ! ... use the local variables to do the work
1992 ! DEST_2
1993 CALL mp_waitall(info%recv_request(:))
1994 ALLOCATE (recv_count(0:send_size - 1))
1995 ! Loop over the rmat, filling it in with data from the recv buffers
1996 ! (here the block sizes, num_pes refer to the distribution of the source matrix)
1997 recv_count(:) = 0
1998 DO j = 1, nlocal_recv(2)
1999 src_q_j = mod(((recv_col_indices(j) - 1)/nblock_src(2)), src_num_pe(2))
2000 DO i = 1, nlocal_recv(1)
2001 src_p_i = mod(((recv_row_indices(i) - 1)/nblock_src(1)), src_num_pe(1))
2002 mpi_rank = info%src_blacs2mpi(src_p_i, src_q_j)
2003 recv_count(mpi_rank) = recv_count(mpi_rank) + 1
2004 destination%local_data(i, j) = info%recv_buf(info%recv_disp(mpi_rank) + recv_count(mpi_rank))
2005 END DO
2006 END DO
2007 DEALLOCATE (recv_count, info%recv_disp, info%recv_request, info%recv_buf, info%src_blacs2mpi)
2008 ! Invalidate the stored state
2009 NULLIFY (info%recv_col_indices, &
2010 info%recv_row_indices)
2011
2012 END IF
2013
2014 CALL timestop(handle)
2015
2016 END SUBROUTINE cp_fm_finish_copy_general
2017
2018! **************************************************************************************************
2019!> \brief Completes the copy operation: wait for comms clean up MPI state
2020!> \param info all of the data that will be needed to complete the copy operation
2021! **************************************************************************************************
2023 TYPE(copy_info_type), INTENT(INOUT) :: info
2024
2025 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_cleanup_copy_general'
2026
2027 INTEGER :: handle
2028
2029 CALL timeset(routinen, handle)
2030
2031 IF (.NOT. cp2k_is_parallel) THEN
2032 ! Don't do anything - no MPI state for the serial case
2033 ELSE
2034 ! SRC_2
2035 ! If this process is also in the destination decomposition, this deallocate
2036 ! Was already done in cp_fm_finish_copy_general
2037 IF (ALLOCATED(info%src_blacs2mpi)) THEN
2038 DEALLOCATE (info%src_blacs2mpi)
2039 END IF
2040 CALL mp_waitall(info%send_request)
2041 DEALLOCATE (info%send_request, info%send_buf)
2042
2043 END IF
2044
2045 CALL timestop(handle)
2046
2047 END SUBROUTINE cp_fm_cleanup_copy_general
2048
2049! **************************************************************************************************
2050!> \brief General copy of a submatrix of fm matrix to a submatrix of another fm matrix.
2051!> The two matrices can have different contexts.
2052!>
2053!> Summary of distribution routines for dense matrices
2054!> The following will copy A(iA:iA+M-1,jA:jA+N-1) to B(iB:iB+M-1,jB:jB+N-1):
2055!>
2056!> call pdgemr2d(M,N,Aloc,iA,jA,descA,Bloc,iB,jB,descB,context)
2057!>
2058!> A process that is not a part of the context of A should set descA(2)
2059!> to -1, and similarly for B.
2060!>
2061!> \param source input fm matrix
2062!> \param destination output fm matrix
2063!> \param nrows number of rows of sub matrix to be copied
2064!> \param ncols number of cols of sub matrix to be copied
2065!> \param s_firstrow starting global row index of sub matrix in source
2066!> \param s_firstcol starting global col index of sub matrix in source
2067!> \param d_firstrow starting global row index of sub matrix in destination
2068!> \param d_firstcol starting global col index of sub matrix in destination
2069!> \param global_context process grid that covers all parts of either A or B.
2070! **************************************************************************************************
2071 SUBROUTINE cp_fm_to_fm_submat_general(source, &
2072 destination, &
2073 nrows, &
2074 ncols, &
2075 s_firstrow, &
2076 s_firstcol, &
2077 d_firstrow, &
2078 d_firstcol, &
2079 global_context)
2080
2081 TYPE(cp_fm_type), INTENT(IN) :: source, destination
2082 INTEGER, INTENT(IN) :: nrows, ncols, s_firstrow, s_firstcol, &
2083 d_firstrow, d_firstcol
2084
2085 CLASS(cp_blacs_type), INTENT(IN) :: global_context
2086
2087 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_to_fm_submat_general'
2088
2089 LOGICAL :: debug
2090 INTEGER :: handle
2091#if defined(__parallel)
2092 INTEGER, DIMENSION(9) :: desca, descb
2093 REAL(kind=dp), DIMENSION(1, 1), TARGET :: dummy
2094 REAL(kind=dp), DIMENSION(:, :), POINTER :: smat, dmat
2095#endif
2096
2097 CALL timeset(routinen, handle)
2098
2099 debug = debug_this_module
2100
2101 IF (.NOT. cp2k_is_parallel) THEN
2102 CALL cp_fm_to_fm_submat(source, &
2103 destination, &
2104 nrows, &
2105 ncols, &
2106 s_firstrow, &
2107 s_firstcol, &
2108 d_firstrow, &
2109 d_firstcol)
2110 ELSE
2111#ifdef __parallel
2112 NULLIFY (smat, dmat)
2113 ! check whether source is available on this process
2114 IF (ASSOCIATED(source%matrix_struct)) THEN
2115 desca = source%matrix_struct%descriptor
2116 IF (source%use_sp) cpabort("only DP kind implemented")
2117 IF (nrows > source%matrix_struct%nrow_global) &
2118 cpabort("nrows is greater than nrow_global of source")
2119 IF (ncols > source%matrix_struct%ncol_global) &
2120 cpabort("ncols is greater than ncol_global of source")
2121 smat => source%local_data
2122 ELSE
2123 desca = -1
2124 smat => dummy
2125 END IF
2126 ! check destination is available on this process
2127 IF (ASSOCIATED(destination%matrix_struct)) THEN
2128 descb = destination%matrix_struct%descriptor
2129 IF (destination%use_sp) cpabort("only DP kind implemented")
2130 IF (nrows > destination%matrix_struct%nrow_global) &
2131 cpabort("nrows is greater than nrow_global of destination")
2132 IF (ncols > destination%matrix_struct%ncol_global) &
2133 cpabort("ncols is greater than ncol_global of destination")
2134 dmat => destination%local_data
2135 ELSE
2136 descb = -1
2137 dmat => dummy
2138 END IF
2139 ! do copy
2140
2141 CALL pdgemr2d(nrows, &
2142 ncols, &
2143 smat, &
2144 s_firstrow, &
2145 s_firstcol, &
2146 desca, &
2147 dmat, &
2148 d_firstrow, &
2149 d_firstcol, &
2150 descb, &
2151 global_context%get_handle())
2152#else
2153 mark_used(global_context)
2154 cpabort("this subroutine only supports SCALAPACK")
2155#endif
2156 END IF
2157
2158 CALL timestop(handle)
2159
2160 END SUBROUTINE cp_fm_to_fm_submat_general
2161
2162! **************************************************************************************************
2163!> \brief ...
2164!> \param matrix ...
2165!> \param irow_global ...
2166!> \param icol_global ...
2167!> \param alpha ...
2168! **************************************************************************************************
2169 SUBROUTINE cp_fm_add_to_element(matrix, irow_global, icol_global, alpha)
2170
2171 ! Add alpha to the matrix element specified by the global indices
2172 ! irow_global and icol_global
2173
2174 ! - Creation (05.05.06,MK)
2175
2176 TYPE(cp_fm_type), INTENT(IN) :: matrix
2177 INTEGER, INTENT(IN) :: irow_global, icol_global
2178 REAL(kind=dp), INTENT(IN) :: alpha
2179
2180 INTEGER :: mypcol, myprow, npcol, nprow
2181 REAL(kind=dp), DIMENSION(:, :), POINTER :: a
2182 TYPE(cp_blacs_env_type), POINTER :: context
2183#if defined(__parallel)
2184 INTEGER :: icol_local, ipcol, iprow, &
2185 irow_local
2186 INTEGER, DIMENSION(9) :: desca
2187#endif
2188
2189 context => matrix%matrix_struct%context
2190
2191 myprow = context%mepos(1)
2192 mypcol = context%mepos(2)
2193
2194 nprow = context%num_pe(1)
2195 npcol = context%num_pe(2)
2196
2197 a => matrix%local_data
2198
2199#if defined(__parallel)
2200
2201 desca(:) = matrix%matrix_struct%descriptor(:)
2202
2203 CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
2204 irow_local, icol_local, iprow, ipcol)
2205
2206 IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
2207 a(irow_local, icol_local) = a(irow_local, icol_local) + alpha
2208 END IF
2209
2210#else
2211
2212 a(irow_global, icol_global) = a(irow_global, icol_global) + alpha
2213
2214#endif
2215
2216 END SUBROUTINE cp_fm_add_to_element
2217
2218! **************************************************************************************************
2219!> \brief ...
2220!> \param fm ...
2221!> \param unit ...
2222! **************************************************************************************************
2223 SUBROUTINE cp_fm_write_unformatted(fm, unit)
2224 TYPE(cp_fm_type), INTENT(IN) :: fm
2225 INTEGER, INTENT(IN) :: unit
2226
2227 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_write_unformatted'
2228
2229 INTEGER :: handle, j, max_block, &
2230 ncol_global, nrow_global
2231 TYPE(mp_para_env_type), POINTER :: para_env
2232#if defined(__parallel)
2233 INTEGER :: i, i_block, icol_local, &
2234 in, info, ipcol, &
2235 iprow, irow_local, &
2236 mepos, &
2237 num_pe, rb, tag
2238 INTEGER, DIMENSION(9) :: desc
2239 REAL(kind=dp), DIMENSION(:), POINTER :: vecbuf
2240 REAL(kind=dp), DIMENSION(:, :), POINTER :: newdat
2241 TYPE(cp_blacs_type) :: ictxt_loc
2242 INTEGER, EXTERNAL :: numroc
2243#endif
2244
2245 CALL timeset(routinen, handle)
2246 CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
2247 para_env=para_env)
2248
2249#if defined(__parallel)
2250 num_pe = para_env%num_pe
2251 mepos = para_env%mepos
2252 rb = nrow_global
2253 tag = 0
2254 ! get a new context
2255 CALL ictxt_loc%gridinit(para_env, 'R', 1, num_pe)
2256 CALL descinit(desc, nrow_global, ncol_global, rb, max_block, 0, 0, ictxt_loc%get_handle(), nrow_global, info)
2257 cpassert(info == 0)
2258 associate(nprow => ictxt_loc%num_pe(1), npcol => ictxt_loc%num_pe(2), &
2259 myprow => ictxt_loc%mepos(1), mypcol => ictxt_loc%mepos(2))
2260 in = numroc(ncol_global, max_block, mypcol, 0, npcol)
2261
2262 ALLOCATE (newdat(nrow_global, max(1, in)))
2263
2264 ! do the actual scalapack to cols reordering
2265 CALL pdgemr2d(nrow_global, ncol_global, fm%local_data, 1, 1, &
2266 fm%matrix_struct%descriptor, &
2267 newdat, 1, 1, desc, ictxt_loc%get_handle())
2268
2269 ALLOCATE (vecbuf(nrow_global*max_block))
2270 vecbuf = huge(1.0_dp) ! init for valgrind
2271
2272 DO i = 1, ncol_global, max(max_block, 1)
2273 i_block = min(max_block, ncol_global - i + 1)
2274 CALL infog2l(1, i, desc, nprow, npcol, myprow, mypcol, &
2275 irow_local, icol_local, iprow, ipcol)
2276 IF (ipcol == mypcol) THEN
2277 DO j = 1, i_block
2278 vecbuf((j - 1)*nrow_global + 1:nrow_global*j) = newdat(:, icol_local + j - 1)
2279 END DO
2280 END IF
2281
2282 IF (ipcol == 0) THEN
2283 ! do nothing
2284 ELSE
2285 IF (ipcol == mypcol) THEN
2286 CALL para_env%send(vecbuf(:), 0, tag)
2287 END IF
2288 IF (mypcol == 0) THEN
2289 CALL para_env%recv(vecbuf(:), ipcol, tag)
2290 END IF
2291 END IF
2292
2293 IF (unit > 0) THEN
2294 DO j = 1, i_block
2295 WRITE (unit) vecbuf((j - 1)*nrow_global + 1:nrow_global*j)
2296 END DO
2297 END IF
2298
2299 END DO
2300 END associate
2301 DEALLOCATE (vecbuf)
2302
2303 CALL ictxt_loc%gridexit()
2304
2305 DEALLOCATE (newdat)
2306
2307#else
2308
2309 IF (unit > 0) THEN
2310 DO j = 1, ncol_global
2311 WRITE (unit) fm%local_data(:, j)
2312 END DO
2313 END IF
2314
2315#endif
2316 CALL timestop(handle)
2317
2318 END SUBROUTINE cp_fm_write_unformatted
2319
2320! **************************************************************************************************
2321!> \brief Write out a full matrix in plain text.
2322!> \param fm the matrix to be outputted
2323!> \param unit the unit number for I/O
2324!> \param header optional header
2325!> \param value_format ...
2326! **************************************************************************************************
2327 SUBROUTINE cp_fm_write_formatted(fm, unit, header, value_format)
2328 TYPE(cp_fm_type), INTENT(IN) :: fm
2329 INTEGER, INTENT(IN) :: unit
2330 CHARACTER(LEN=*), INTENT(IN), OPTIONAL :: header, value_format
2331
2332 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_write_formatted'
2333
2334 CHARACTER(LEN=21) :: my_value_format
2335 INTEGER :: handle, i, j, max_block, &
2336 ncol_global, nrow_global
2337 TYPE(mp_para_env_type), POINTER :: para_env
2338#if defined(__parallel)
2339 INTEGER :: i_block, icol_local, &
2340 in, info, ipcol, &
2341 iprow, irow_local, &
2342 mepos, num_pe, rb, tag, k, &
2343 icol, irow
2344 INTEGER, DIMENSION(9) :: desc
2345 REAL(kind=dp), DIMENSION(:), POINTER :: vecbuf
2346 REAL(kind=dp), DIMENSION(:, :), POINTER :: newdat
2347 TYPE(cp_blacs_type) :: ictxt_loc
2348 INTEGER, EXTERNAL :: numroc
2349#endif
2350
2351 CALL timeset(routinen, handle)
2352 CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
2353 para_env=para_env)
2354
2355 IF (PRESENT(value_format)) THEN
2356 cpassert(len_trim(adjustl(value_format)) < 11)
2357 my_value_format = "(I10, I10, "//trim(adjustl(value_format))//")"
2358 ELSE
2359 my_value_format = "(I10, I10, ES24.12)"
2360 END IF
2361
2362 IF (unit > 0) THEN
2363 IF (PRESENT(header)) WRITE (unit, *) header
2364 WRITE (unit, "(A2, A8, A10, A24)") "#", "Row", "Column", adjustl("Value")
2365 END IF
2366
2367#if defined(__parallel)
2368 num_pe = para_env%num_pe
2369 mepos = para_env%mepos
2370 rb = nrow_global
2371 tag = 0
2372 ! get a new context
2373 CALL ictxt_loc%gridinit(para_env, 'R', 1, num_pe)
2374 CALL descinit(desc, nrow_global, ncol_global, rb, max_block, 0, 0, ictxt_loc%get_handle(), nrow_global, info)
2375 cpassert(info == 0)
2376 associate(nprow => ictxt_loc%num_pe(1), npcol => ictxt_loc%num_pe(2), &
2377 myprow => ictxt_loc%mepos(1), mypcol => ictxt_loc%mepos(2))
2378 in = numroc(ncol_global, max_block, mypcol, 0, npcol)
2379
2380 ALLOCATE (newdat(nrow_global, max(1, in)))
2381
2382 ! do the actual scalapack to cols reordering
2383 CALL pdgemr2d(nrow_global, ncol_global, fm%local_data, 1, 1, &
2384 fm%matrix_struct%descriptor, &
2385 newdat, 1, 1, desc, ictxt_loc%get_handle())
2386
2387 ALLOCATE (vecbuf(nrow_global*max_block))
2388 vecbuf = huge(1.0_dp) ! init for valgrind
2389 irow = 1
2390 icol = 1
2391
2392 DO i = 1, ncol_global, max(max_block, 1)
2393 i_block = min(max_block, ncol_global - i + 1)
2394 CALL infog2l(1, i, desc, nprow, npcol, myprow, mypcol, &
2395 irow_local, icol_local, iprow, ipcol)
2396 IF (ipcol == mypcol) THEN
2397 DO j = 1, i_block
2398 vecbuf((j - 1)*nrow_global + 1:nrow_global*j) = newdat(:, icol_local + j - 1)
2399 END DO
2400 END IF
2401
2402 IF (ipcol == 0) THEN
2403 ! do nothing
2404 ELSE
2405 IF (ipcol == mypcol) THEN
2406 CALL para_env%send(vecbuf(:), 0, tag)
2407 END IF
2408 IF (mypcol == 0) THEN
2409 CALL para_env%recv(vecbuf(:), ipcol, tag)
2410 END IF
2411 END IF
2412
2413 IF (unit > 0) THEN
2414 DO j = 1, i_block
2415 DO k = (j - 1)*nrow_global + 1, nrow_global*j
2416 WRITE (unit=unit, fmt=my_value_format) irow, icol, vecbuf(k)
2417 irow = irow + 1
2418 IF (irow > nrow_global) THEN
2419 irow = 1
2420 icol = icol + 1
2421 END IF
2422 END DO
2423 END DO
2424 END IF
2425
2426 END DO
2427 END associate
2428 DEALLOCATE (vecbuf)
2429
2430 CALL ictxt_loc%gridexit()
2431
2432 DEALLOCATE (newdat)
2433
2434#else
2435
2436 IF (unit > 0) THEN
2437 DO j = 1, ncol_global
2438 DO i = 1, nrow_global
2439 WRITE (unit=unit, fmt=my_value_format) i, j, fm%local_data(i, j)
2440 END DO
2441 END DO
2442 END IF
2443
2444#endif
2445 CALL timestop(handle)
2446
2447 END SUBROUTINE cp_fm_write_formatted
2448
2449! **************************************************************************************************
2450!> \brief ...
2451!> \param fm ...
2452!> \param unit ...
2453! **************************************************************************************************
2454 SUBROUTINE cp_fm_read_unformatted(fm, unit)
2455 TYPE(cp_fm_type), INTENT(INOUT) :: fm
2456 INTEGER, INTENT(IN) :: unit
2457
2458 CHARACTER(LEN=*), PARAMETER :: routinen = 'cp_fm_read_unformatted'
2459
2460 INTEGER :: handle, j, max_block, &
2461 ncol_global, nrow_global
2462 TYPE(mp_para_env_type), POINTER :: para_env
2463#if defined(__parallel)
2464 INTEGER :: k, n_cols
2465 REAL(kind=dp), DIMENSION(:, :), POINTER :: vecbuf
2466#endif
2467
2468 CALL timeset(routinen, handle)
2469
2470 CALL cp_fm_get_info(fm, nrow_global=nrow_global, ncol_global=ncol_global, ncol_block=max_block, &
2471 para_env=para_env)
2472
2473#if defined(__parallel)
2474
2475 ! the parallel case could be made more efficient (see cp_fm_write_unformatted)
2476
2477 ALLOCATE (vecbuf(nrow_global, max_block))
2478
2479 DO j = 1, ncol_global, max_block
2480
2481 n_cols = min(max_block, ncol_global - j + 1)
2482 IF (para_env%mepos == 0) THEN
2483 DO k = 1, n_cols
2484 READ (unit) vecbuf(:, k)
2485 END DO
2486 END IF
2487 CALL para_env%bcast(vecbuf, 0)
2488 CALL cp_fm_set_submatrix(fm, vecbuf, start_row=1, start_col=j, n_cols=n_cols)
2489
2490 END DO
2491
2492 DEALLOCATE (vecbuf)
2493
2494#else
2495
2496 DO j = 1, ncol_global
2497 READ (unit) fm%local_data(:, j)
2498 END DO
2499
2500#endif
2501
2502 CALL timestop(handle)
2503
2504 END SUBROUTINE cp_fm_read_unformatted
2505
2506! **************************************************************************************************
2507!> \brief ...
2508!> \param mm_type ...
2509! **************************************************************************************************
2510 SUBROUTINE cp_fm_setup(mm_type)
2511 INTEGER, INTENT(IN) :: mm_type
2512
2513 cp_fm_mm_type = mm_type
2514 END SUBROUTINE cp_fm_setup
2515
2516! **************************************************************************************************
2517!> \brief ...
2518!> \return ...
2519! **************************************************************************************************
2520 FUNCTION cp_fm_get_mm_type() RESULT(res)
2521 INTEGER :: res
2522
2523 res = cp_fm_mm_type
2524 END FUNCTION cp_fm_get_mm_type
2525
2526! **************************************************************************************************
2527!> \brief ...
2528!> \param ictxt ...
2529!> \param prec ...
2530!> \return ...
2531! **************************************************************************************************
2532 FUNCTION cp_fm_pilaenv(ictxt, prec) RESULT(res)
2533 INTEGER :: ictxt
2534 CHARACTER(LEN=1) :: prec
2535 INTEGER :: res
2536#if defined(__parallel)
2537 INTEGER :: pilaenv
2538 res = pilaenv(ictxt, prec)
2539#else
2540 mark_used(ictxt)
2541 mark_used(prec)
2542 res = -1
2543#endif
2544
2545 END FUNCTION cp_fm_pilaenv
2546
2547END MODULE cp_fm_types
methods related to the blacs parallel environment
wrappers for the actual blacs calls. all functionality needed in the code should actually be provide ...
represent the structure of a full matrix
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
logical function, public cp_fm_struct_equivalent(fmstruct1, fmstruct2)
returns true if the two matrix structures are equivalent, false otherwise.
subroutine, public cp_fm_struct_retain(fmstruct)
retains a full matrix structure
subroutine, public cp_fm_struct_write_info(fmstruct, io_unit)
Write nicely formatted info about the FM struct to the given I/O unit.
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
Definition cp_fm_types.F:15
subroutine, public cp_fm_copy_general(source, destination, para_env)
General copy of a fm matrix to another fm matrix. Uses non-blocking MPI rather than ScaLAPACK.
subroutine, public cp_fm_get_diag(matrix, diag)
returns the diagonal elements of a fm
subroutine, public cp_fm_start_copy_general(source, destination, para_env, info)
Initiates the copy operation: get distribution data, post MPI isend and irecvs.
subroutine, public cp_fm_cleanup_copy_general(info)
Completes the copy operation: wait for comms clean up MPI state.
subroutine, public cp_fm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, nrow_locals, ncol_locals, matrix_struct, para_env)
returns all kind of information about the full matrix
integer function, public cp_fm_get_mm_type()
...
subroutine, public cp_fm_vectorssum(matrix, sum_array, dir)
summing up all the elements along the matrix's i-th index or
subroutine, public cp_fm_create(matrix, matrix_struct, name, use_sp, nrow, ncol, set_zero)
creates a new full matrix with the given structure
subroutine, public cp_fm_write_unformatted(fm, unit)
...
subroutine, public cp_fm_get_element(matrix, irow_global, icol_global, alpha, local)
returns an element of a fm this value is valid on every cpu using this call is expensive
subroutine, public cp_fm_add_to_element(matrix, irow_global, icol_global, alpha)
...
subroutine, public cp_fm_set_all_submatrix(fm, new_value, start_row, start_col, n_rows, n_cols)
sets a submatrix of a full matrix to a given value fm(start_row:start_row+n_rows,start_col:start_col+...
subroutine, public cp_fm_setup(mm_type)
...
subroutine, public cp_fm_set_submatrix(fm, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
sets a submatrix of a full matrix fm(start_row:start_row+n_rows,start_col:start_col+n_cols) = alpha*o...
subroutine, public cp_fm_read_unformatted(fm, unit)
...
subroutine, public cp_fm_vectorsnorm(matrix, norm_array)
find the inorm of each column norm_{j}= sqrt( \sum_{i} A_{ij}*A_{ij} )
subroutine, public cp_fm_write_info(matrix, io_unit)
Write nicely formatted info about the FM to the given I/O unit (including the underlying FM struct)
subroutine, public cp_fm_maxabsrownorm(matrix, a_max)
find the maximum over the rows of the sum of the absolute values of the elements of a given row = || ...
subroutine, public cp_fm_to_fm_submat_general(source, destination, nrows, ncols, s_firstrow, s_firstcol, d_firstrow, d_firstcol, global_context)
General copy of a submatrix of fm matrix to a submatrix of another fm matrix. The two matrices can ha...
subroutine, public cp_fm_to_fm_submat(msource, mtarget, nrow, ncol, s_firstrow, s_firstcol, t_firstrow, t_firstcol)
copy just a part ot the matrix
subroutine, public cp_fm_maxabsval(matrix, a_max, ir_max, ic_max)
find the maximum absolute value of the matrix element maxval(abs(matrix))
subroutine, public cp_fm_set_all(matrix, alpha, beta)
set all elements of a matrix to the same value, and optionally the diagonal to a different one
subroutine, public cp_fm_finish_copy_general(destination, info)
Completes the copy operation: wait for comms, unpack, clean up MPI state.
subroutine, public cp_fm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
gets a submatrix of a full matrix op(target_m)(1:n_rows,1:n_cols) =fm(start_row:start_row+n_rows,...
subroutine, public cp_fm_set_element(matrix, irow_global, icol_global, alpha)
sets an element of a matrix
subroutine, public cp_fm_init_random(matrix, ncol, start_col)
fills a matrix with random numbers
subroutine, public cp_fm_to_fm_triangular(msource, mtarget, uplo)
copy just a triangular matrix
integer function, public cp_fm_pilaenv(ictxt, prec)
...
subroutine, public cp_fm_write_formatted(fm, unit, header, value_format)
Write out a full matrix in plain text.
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public dp
Definition kinds.F:34
integer, parameter, public sp
Definition kinds.F:33
Interface to the message passing library MPI.
integer, parameter, public mp_proc_null
logical, parameter, public cp2k_is_parallel
integer, parameter, public mp_any_source
type(mp_request_type), parameter, public mp_request_null
Parallel (pseudo)random number generator (RNG) for multiple streams and substreams of random numbers.
integer, parameter, public uniform
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
keeps the information about the structure of a full matrix
Stores the state of a copy between cp_fm_start_copy_general and cp_fm_finish_copy_general.
just to build arrays of pointers to matrices
represent a full matrix
stores all the informations relevant to an mpi environment