(git:374b731)
Loading...
Searching...
No Matches
cp_cfm_types.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief Represents a complex full matrix distributed on many processors.
10!> \author Joost VandeVondele, based on Fawzi's cp_fm_* routines
11! **************************************************************************************************
19 USE cp_fm_types, ONLY: cp_fm_type
20 USE kinds, ONLY: dp
21 USE mathconstants, ONLY: z_one,&
22 z_zero
30#include "../base/base_uses.f90"
31
32 IMPLICIT NONE
33 PRIVATE
34
35 LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .true.
36 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_cfm_types'
37 INTEGER, PARAMETER, PRIVATE :: src_tag = 3, dest_tag = 5, send_tag = 7, recv_tag = 11
38
54
55 INTERFACE cp_cfm_to_cfm
56 MODULE PROCEDURE cp_cfm_to_cfm_matrix, & ! a full matrix
57 cp_cfm_to_cfm_columns ! just a number of columns
58 END INTERFACE
59
60! **************************************************************************************************
61!> \brief Represent a complex full matrix.
62!> \param name the name of the matrix, used for printing
63!> \param matrix_struct structure of this matrix
64!> \param local_data array with the data of the matrix (its content depends on
65!> the matrix type used: in parallel run it will be in
66!> ScaLAPACK format, in sequential run it will simply contain the matrix)
67! **************************************************************************************************
69 CHARACTER(len=60) :: name = ""
70 TYPE(cp_fm_struct_type), POINTER :: matrix_struct => null()
71 COMPLEX(kind=dp), DIMENSION(:, :), POINTER, CONTIGUOUS :: local_data => null()
72 END TYPE cp_cfm_type
73
74! **************************************************************************************************
75!> \brief Just to build arrays of pointers to matrices.
76!> \param matrix the pointer to the matrix
77! **************************************************************************************************
79 TYPE(cp_cfm_type), POINTER :: matrix => null()
80 END TYPE cp_cfm_p_type
81
82! **************************************************************************************************
83!> \brief Stores the state of a copy between cp_cfm_start_copy_general
84!> and cp_cfm_finish_copy_general.
85!> \par History
86!> Jan 2017 derived type 'copy_info_type' has been created [Mark T]
87!> Jan 2018 the type 'copy_info_type' has been adapted for complex matrices [Sergey Chulkov]
88! **************************************************************************************************
90 !> number of MPI processes that send data
91 INTEGER :: send_size = -1
92 !> number of locally stored rows (1) and columns (2) of the destination matrix
93 INTEGER, DIMENSION(2) :: nlocal_recv = -1
94 !> number of rows (1) and columns (2) of the ScaLAPACK block of the source matrix
95 INTEGER, DIMENSION(2) :: nblock_src = -1
96 !> BLACS process grid shape of the source matrix: (1) nproc_row, (2) nproc_col
97 INTEGER, DIMENSION(2) :: src_num_pe = -1
98 !> displacements into recv_buf
99 INTEGER, ALLOCATABLE, DIMENSION(:) :: recv_disp
100 !> MPI requests for non-blocking receive and send operations
101 TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: recv_request, send_request
102 !> global column and row indices of locally stored elements of the destination matrix
103 INTEGER, DIMENSION(:), POINTER :: recv_col_indices => null(), recv_row_indices => null()
104 !> rank of MPI process with BLACS coordinates (prow, pcol)
105 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: src_blacs2mpi
106 !> receiving and sending buffers for non-blocking MPI communication
107 COMPLEX(kind=dp), ALLOCATABLE, DIMENSION(:) :: recv_buf, send_buf
108 END TYPE copy_cfm_info_type
109
110CONTAINS
111
112! **************************************************************************************************
113!> \brief Creates a new full matrix with the given structure.
114!> \param matrix matrix to be created
115!> \param matrix_struct structure of the matrix
116!> \param name name of the matrix
117!> \note
118!> preferred allocation routine
119! **************************************************************************************************
120 SUBROUTINE cp_cfm_create(matrix, matrix_struct, name)
121 TYPE(cp_cfm_type), INTENT(OUT) :: matrix
122 TYPE(cp_fm_struct_type), TARGET, INTENT(IN) :: matrix_struct
123 CHARACTER(len=*), INTENT(in), OPTIONAL :: name
124
125 INTEGER :: ncol_local, npcol, nprow, nrow_local
126 TYPE(cp_blacs_env_type), POINTER :: context
127
128#if defined(__parallel) && ! defined(__SCALAPACK)
129 cpabort("full matrices need ScaLAPACK for parallel run")
130#endif
131
132 context => matrix_struct%context
133 matrix%matrix_struct => matrix_struct
134 CALL cp_fm_struct_retain(matrix%matrix_struct)
135
136 nprow = context%num_pe(1)
137 npcol = context%num_pe(2)
138 NULLIFY (matrix%local_data)
139
140 nrow_local = matrix_struct%local_leading_dimension
141 ncol_local = max(1, matrix_struct%ncol_locals(context%mepos(2)))
142 ALLOCATE (matrix%local_data(nrow_local, ncol_local))
143
144 ! do not initialise created matrix as it is up to the user to do so
145 !CALL zcopy(nrow_local*ncol_local, z_zero, 0, matrix%local_data, 1)
146
147 IF (PRESENT(name)) THEN
148 matrix%name = name
149 ELSE
150 matrix%name = 'full complex matrix'
151 END IF
152 END SUBROUTINE cp_cfm_create
153
154! **************************************************************************************************
155!> \brief Releases a full matrix.
156!> \param matrix the matrix to release
157! **************************************************************************************************
158 SUBROUTINE cp_cfm_release(matrix)
159 TYPE(cp_cfm_type), INTENT(INOUT) :: matrix
160
161 IF (ASSOCIATED(matrix%local_data)) THEN
162 DEALLOCATE (matrix%local_data)
163 END IF
164 matrix%name = ""
165 CALL cp_fm_struct_release(matrix%matrix_struct)
166 END SUBROUTINE cp_cfm_release
167
168! **************************************************************************************************
169!> \brief Set all elements of the full matrix to alpha. Besides, set all
170!> diagonal matrix elements to beta (if given explicitly).
171!> \param matrix matrix to initialise
172!> \param alpha value of off-diagonal matrix elements
173!> \param beta value of diagonal matrix elements (equal to alpha if absent)
174!> \date 12.06.2001
175!> \author Matthias Krack
176!> \version 1.0
177! **************************************************************************************************
178 SUBROUTINE cp_cfm_set_all(matrix, alpha, beta)
179 TYPE(cp_cfm_type), INTENT(IN) :: matrix
180 COMPLEX(kind=dp), INTENT(in) :: alpha
181 COMPLEX(kind=dp), INTENT(in), OPTIONAL :: beta
182
183 INTEGER :: irow_local, nrow_local
184#if defined(__SCALAPACK)
185 INTEGER :: icol_local, ncol_local
186 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
187#endif
188
189 CALL zcopy(SIZE(matrix%local_data), alpha, 0, matrix%local_data(1, 1), 1)
190
191 IF (PRESENT(beta)) THEN
192#if defined(__SCALAPACK)
193 CALL cp_cfm_get_info(matrix, nrow_local=nrow_local, ncol_local=ncol_local, &
194 row_indices=row_indices, col_indices=col_indices)
195
196 icol_local = 1
197 irow_local = 1
198
199 DO WHILE (irow_local <= nrow_local .AND. icol_local <= ncol_local)
200 IF (row_indices(irow_local) < col_indices(icol_local)) THEN
201 irow_local = irow_local + 1
202 ELSE IF (row_indices(irow_local) > col_indices(icol_local)) THEN
203 icol_local = icol_local + 1
204 ELSE
205 matrix%local_data(irow_local, icol_local) = beta
206 irow_local = irow_local + 1
207 icol_local = icol_local + 1
208 END IF
209 END DO
210#else
211 nrow_local = min(matrix%matrix_struct%nrow_global, matrix%matrix_struct%ncol_global)
212
213 DO irow_local = 1, nrow_local
214 matrix%local_data(irow_local, irow_local) = beta
215 END DO
216#endif
217 END IF
218
219 END SUBROUTINE cp_cfm_set_all
220
221! **************************************************************************************************
222!> \brief Get the matrix element by its global index.
223!> \param matrix full matrix
224!> \param irow_global global row index
225!> \param icol_global global column index
226!> \param alpha matrix element
227!> \par History
228!> , TCH, created
229!> always return the answer
230! **************************************************************************************************
231 SUBROUTINE cp_cfm_get_element(matrix, irow_global, icol_global, alpha)
232 TYPE(cp_cfm_type), INTENT(IN) :: matrix
233 INTEGER, INTENT(in) :: irow_global, icol_global
234 COMPLEX(kind=dp), INTENT(out) :: alpha
235
236#if defined(__SCALAPACK)
237 INTEGER :: icol_local, ipcol, iprow, irow_local, &
238 mypcol, myprow, npcol, nprow
239 INTEGER, DIMENSION(9) :: desca
240 TYPE(cp_blacs_env_type), POINTER :: context
241#endif
242
243#if defined(__SCALAPACK)
244 context => matrix%matrix_struct%context
245 myprow = context%mepos(1)
246 mypcol = context%mepos(2)
247 nprow = context%num_pe(1)
248 npcol = context%num_pe(2)
249
250 desca(:) = matrix%matrix_struct%descriptor(:)
251
252 CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
253 irow_local, icol_local, iprow, ipcol)
254
255 IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
256 alpha = matrix%local_data(irow_local, icol_local)
257 CALL context%ZGEBS2D('All', ' ', 1, 1, alpha, 1)
258 ELSE
259 CALL context%ZGEBR2D('All', ' ', 1, 1, alpha, 1, iprow, ipcol)
260 END IF
261
262#else
263 alpha = matrix%local_data(irow_global, icol_global)
264#endif
265
266 END SUBROUTINE cp_cfm_get_element
267
268! **************************************************************************************************
269!> \brief Set the matrix element (irow_global,icol_global) of the full matrix to alpha.
270!> \param matrix full matrix
271!> \param irow_global global row index
272!> \param icol_global global column index
273!> \param alpha value of the matrix element
274!> \date 12.06.2001
275!> \author Matthias Krack
276!> \version 1.0
277! **************************************************************************************************
278 SUBROUTINE cp_cfm_set_element(matrix, irow_global, icol_global, alpha)
279 TYPE(cp_cfm_type), INTENT(IN) :: matrix
280 INTEGER, INTENT(in) :: irow_global, icol_global
281 COMPLEX(kind=dp), INTENT(in) :: alpha
282
283#if defined(__SCALAPACK)
284 INTEGER :: icol_local, ipcol, iprow, irow_local, &
285 mypcol, myprow, npcol, nprow
286 INTEGER, DIMENSION(9) :: desca
287 TYPE(cp_blacs_env_type), POINTER :: context
288#endif
289
290#if defined(__SCALAPACK)
291 context => matrix%matrix_struct%context
292 myprow = context%mepos(1)
293 mypcol = context%mepos(2)
294 nprow = context%num_pe(1)
295 npcol = context%num_pe(2)
296
297 desca(:) = matrix%matrix_struct%descriptor(:)
298
299 CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
300 irow_local, icol_local, iprow, ipcol)
301
302 IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
303 matrix%local_data(irow_local, icol_local) = alpha
304 END IF
305
306#else
307 matrix%local_data(irow_global, icol_global) = alpha
308#endif
309
310 END SUBROUTINE cp_cfm_set_element
311
312! **************************************************************************************************
313!> \brief Extract a sub-matrix from the full matrix:
314!> op(target_m)(1:n_rows,1:n_cols) = fm(start_row:start_row+n_rows,start_col:start_col+n_cols).
315!> Sub-matrix 'target_m' is replicated on each CPU. Using this call is expensive.
316!> \param fm full matrix you want to get the elements from
317!> \param target_m 2-D array to store the extracted sub-matrix
318!> \param start_row global row index of the matrix element target_m(1,1) (defaults to 1)
319!> \param start_col global column index of the matrix element target_m(1,1) (defaults to 1)
320!> \param n_rows number of rows to extract (defaults to size(op(target_m),1))
321!> \param n_cols number of columns to extract (defaults to size(op(target_m),2))
322!> \param transpose indicates that the extracted sub-matrix target_m should be transposed:
323!> op(target_m) = target_m^T if .TRUE.,
324!> op(target_m) = target_m if .FALSE. (defaults to false)
325!> \par History
326!> * 04.2016 created borrowing from Fawzi's cp_fm_get_submatrix [Lianheng Tong]
327!> * 01.2018 drop innermost conditional branching [Sergey Chulkov]
328!> \author Lianheng Tong
329!> \note
330!> Optimized for full column updates. The matrix target_m is replicated and valid on all CPUs.
331! **************************************************************************************************
332 SUBROUTINE cp_cfm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
333 TYPE(cp_cfm_type), INTENT(IN) :: fm
334 COMPLEX(kind=dp), DIMENSION(:, :), INTENT(out) :: target_m
335 INTEGER, INTENT(in), OPTIONAL :: start_row, start_col, n_rows, n_cols
336 LOGICAL, INTENT(in), OPTIONAL :: transpose
337
338 CHARACTER(len=*), PARAMETER :: routinen = 'cp_cfm_get_submatrix'
339
340 COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: local_data
341 INTEGER :: end_col_global, end_col_local, end_row_global, end_row_local, handle, i, j, &
342 ncol_global, ncol_local, nrow_global, nrow_local, start_col_global, start_col_local, &
343 start_row_global, start_row_local, this_col
344 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
345 LOGICAL :: do_zero, tr_a
346 TYPE(mp_para_env_type), POINTER :: para_env
347
348 CALL timeset(routinen, handle)
349
350 IF (SIZE(target_m) /= 0) THEN
351#if defined(__SCALAPACK)
352 do_zero = .true.
353#else
354 do_zero = .false.
355#endif
356
357 tr_a = .false.
358 IF (PRESENT(transpose)) tr_a = transpose
359
360 ! find out the first and last global row/column indices
361 start_row_global = 1
362 start_col_global = 1
363 IF (PRESENT(start_row)) start_row_global = start_row
364 IF (PRESENT(start_col)) start_col_global = start_col
365
366 IF (tr_a) THEN
367 end_row_global = SIZE(target_m, 2)
368 end_col_global = SIZE(target_m, 1)
369 ELSE
370 end_row_global = SIZE(target_m, 1)
371 end_col_global = SIZE(target_m, 2)
372 END IF
373 IF (PRESENT(n_rows)) end_row_global = n_rows
374 IF (PRESENT(n_cols)) end_col_global = n_cols
375
376 end_row_global = end_row_global + start_row_global - 1
377 end_col_global = end_col_global + start_col_global - 1
378
379 CALL cp_cfm_get_info(matrix=fm, &
380 nrow_global=nrow_global, ncol_global=ncol_global, &
381 nrow_local=nrow_local, ncol_local=ncol_local, &
382 row_indices=row_indices, col_indices=col_indices)
383 IF (end_row_global > nrow_global) THEN
384 end_row_global = nrow_global
385 do_zero = .true.
386 END IF
387 IF (end_col_global > ncol_global) THEN
388 end_col_global = ncol_global
389 do_zero = .true.
390 END IF
391
392 ! find out row/column indices of locally stored matrix elements that needs to be copied.
393 ! Arrays row_indices and col_indices are assumed to be sorted in ascending order
394 DO start_row_local = 1, nrow_local
395 IF (row_indices(start_row_local) >= start_row_global) EXIT
396 END DO
397
398 DO end_row_local = start_row_local, nrow_local
399 IF (row_indices(end_row_local) > end_row_global) EXIT
400 END DO
401 end_row_local = end_row_local - 1
402
403 DO start_col_local = 1, ncol_local
404 IF (col_indices(start_col_local) >= start_col_global) EXIT
405 END DO
406
407 DO end_col_local = start_col_local, ncol_local
408 IF (col_indices(end_col_local) > end_col_global) EXIT
409 END DO
410 end_col_local = end_col_local - 1
411
412 para_env => fm%matrix_struct%para_env
413 local_data => fm%local_data
414
415 ! wipe the content of the target matrix if:
416 ! * the source matrix is distributed across a number of processes, or
417 ! * not all elements of the target matrix will be assigned, e.g.
418 ! when the target matrix is larger then the source matrix
419 IF (do_zero) &
420 CALL zcopy(SIZE(target_m), z_zero, 0, target_m(1, 1), 1)
421
422 IF (tr_a) THEN
423 DO j = start_col_local, end_col_local
424 this_col = col_indices(j) - start_col_global + 1
425 DO i = start_row_local, end_row_local
426 target_m(this_col, row_indices(i) - start_row_global + 1) = local_data(i, j)
427 END DO
428 END DO
429 ELSE
430 DO j = start_col_local, end_col_local
431 this_col = col_indices(j) - start_col_global + 1
432 DO i = start_row_local, end_row_local
433 target_m(row_indices(i) - start_row_global + 1, this_col) = local_data(i, j)
434 END DO
435 END DO
436 END IF
437
438 CALL para_env%sum(target_m)
439 END IF
440
441 CALL timestop(handle)
442 END SUBROUTINE cp_cfm_get_submatrix
443
444! **************************************************************************************************
445!> \brief Set a sub-matrix of the full matrix:
446!> matrix(start_row:start_row+n_rows,start_col:start_col+n_cols)
447!> = alpha*op(new_values)(1:n_rows,1:n_cols) +
448!> beta*matrix(start_row:start_row+n_rows,start_col:start_col+n_cols)
449!> \param matrix full to update
450!> \param new_values replicated 2-D array that holds new elements of the updated sub-matrix
451!> \param start_row global row index of the matrix element new_values(1,1) (defaults to 1)
452!> \param start_col global column index of the matrix element new_values(1,1) (defaults to 1)
453!> \param n_rows number of rows to update (defaults to size(op(new_values),1))
454!> \param n_cols number of columns to update (defaults to size(op(new_values),2))
455!> \param alpha scale factor for the new values (defaults to (1.0,0.0))
456!> \param beta scale factor for the old values (defaults to (0.0,0.0))
457!> \param transpose indicates that the matrix new_values should be transposed:
458!> op(new_values) = new_values^T if .TRUE.,
459!> op(new_values) = new_values if .FALSE. (defaults to false)
460!> \par History
461!> * 04.2016 created borrowing from Fawzi's cp_fm_set_submatrix [Lianheng Tong]
462!> * 01.2018 drop innermost conditional branching [Sergey Chulkov]
463!> \author Lianheng Tong
464!> \note
465!> Optimized for alpha=(1.0,0.0), beta=(0.0,0.0)
466!> All matrix elements 'new_values' need to be valid on all CPUs
467! **************************************************************************************************
468 SUBROUTINE cp_cfm_set_submatrix(matrix, new_values, start_row, &
469 start_col, n_rows, n_cols, alpha, beta, transpose)
470 TYPE(cp_cfm_type), INTENT(IN) :: matrix
471 COMPLEX(kind=dp), DIMENSION(:, :), INTENT(in) :: new_values
472 INTEGER, INTENT(in), OPTIONAL :: start_row, start_col, n_rows, n_cols
473 COMPLEX(kind=dp), INTENT(in), OPTIONAL :: alpha, beta
474 LOGICAL, INTENT(in), OPTIONAL :: transpose
475
476 CHARACTER(len=*), PARAMETER :: routinen = 'cp_cfm_set_submatrix'
477
478 COMPLEX(kind=dp) :: al, be
479 COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: local_data
480 INTEGER :: end_col_global, end_col_local, end_row_global, end_row_local, handle, i, j, &
481 ncol_global, ncol_local, nrow_global, nrow_local, start_col_global, start_col_local, &
482 start_row_global, start_row_local, this_col
483 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
484 LOGICAL :: tr_a
485
486 CALL timeset(routinen, handle)
487
488 al = z_one
489 be = z_zero
490 IF (PRESENT(alpha)) al = alpha
491 IF (PRESENT(beta)) be = beta
492
493 ! find out the first and last global row/column indices
494 start_row_global = 1
495 start_col_global = 1
496 IF (PRESENT(start_row)) start_row_global = start_row
497 IF (PRESENT(start_col)) start_col_global = start_col
498
499 tr_a = .false.
500 IF (PRESENT(transpose)) tr_a = transpose
501
502 IF (tr_a) THEN
503 end_row_global = SIZE(new_values, 2)
504 end_col_global = SIZE(new_values, 1)
505 ELSE
506 end_row_global = SIZE(new_values, 1)
507 end_col_global = SIZE(new_values, 2)
508 END IF
509 IF (PRESENT(n_rows)) end_row_global = n_rows
510 IF (PRESENT(n_cols)) end_col_global = n_cols
511
512 end_row_global = end_row_global + start_row_global - 1
513 end_col_global = end_col_global + start_col_global - 1
514
515 CALL cp_cfm_get_info(matrix=matrix, &
516 nrow_global=nrow_global, ncol_global=ncol_global, &
517 nrow_local=nrow_local, ncol_local=ncol_local, &
518 row_indices=row_indices, col_indices=col_indices)
519 IF (end_row_global > nrow_global) end_row_global = nrow_global
520 IF (end_col_global > ncol_global) end_col_global = ncol_global
521
522 ! find out row/column indices of locally stored matrix elements that needs to be set.
523 ! Arrays row_indices and col_indices are assumed to be sorted in ascending order
524 DO start_row_local = 1, nrow_local
525 IF (row_indices(start_row_local) >= start_row_global) EXIT
526 END DO
527
528 DO end_row_local = start_row_local, nrow_local
529 IF (row_indices(end_row_local) > end_row_global) EXIT
530 END DO
531 end_row_local = end_row_local - 1
532
533 DO start_col_local = 1, ncol_local
534 IF (col_indices(start_col_local) >= start_col_global) EXIT
535 END DO
536
537 DO end_col_local = start_col_local, ncol_local
538 IF (col_indices(end_col_local) > end_col_global) EXIT
539 END DO
540 end_col_local = end_col_local - 1
541
542 local_data => matrix%local_data
543
544 IF (al == z_one .AND. be == z_zero) THEN
545 IF (tr_a) THEN
546 DO j = start_col_local, end_col_local
547 this_col = col_indices(j) - start_col_global + 1
548 DO i = start_row_local, end_row_local
549 local_data(i, j) = new_values(this_col, row_indices(i) - start_row_global + 1)
550 END DO
551 END DO
552 ELSE
553 DO j = start_col_local, end_col_local
554 this_col = col_indices(j) - start_col_global + 1
555 DO i = start_row_local, end_row_local
556 local_data(i, j) = new_values(row_indices(i) - start_row_global + 1, this_col)
557 END DO
558 END DO
559 END IF
560 ELSE
561 IF (tr_a) THEN
562 DO j = start_col_local, end_col_local
563 this_col = col_indices(j) - start_col_global + 1
564 DO i = start_row_local, end_row_local
565 local_data(i, j) = al*new_values(this_col, row_indices(i) - start_row_global + 1) + &
566 be*local_data(i, j)
567 END DO
568 END DO
569 ELSE
570 DO j = start_col_local, end_col_local
571 this_col = col_indices(j) - start_col_global + 1
572 DO i = start_row_local, end_row_local
573 local_data(i, j) = al*new_values(row_indices(i) - start_row_global + 1, this_col) + &
574 be*local_data(i, j)
575 END DO
576 END DO
577 END IF
578 END IF
579
580 CALL timestop(handle)
581 END SUBROUTINE cp_cfm_set_submatrix
582
583! **************************************************************************************************
584!> \brief Returns information about a full matrix.
585!> \param matrix matrix
586!> \param name name of the matrix
587!> \param nrow_global total number of rows
588!> \param ncol_global total number of columns
589!> \param nrow_block number of rows per ScaLAPACK block
590!> \param ncol_block number of columns per ScaLAPACK block
591!> \param nrow_local number of locally stored rows
592!> \param ncol_local number of locally stored columns
593!> \param row_indices global indices of locally stored rows
594!> \param col_indices global indices of locally stored columns
595!> \param local_data locally stored matrix elements
596!> \param context BLACS context
597!> \param matrix_struct matrix structure
598!> \param para_env parallel environment
599!> \date 12.06.2001
600!> \author Matthias Krack
601!> \version 1.0
602! **************************************************************************************************
603 SUBROUTINE cp_cfm_get_info(matrix, name, nrow_global, ncol_global, &
604 nrow_block, ncol_block, nrow_local, ncol_local, &
605 row_indices, col_indices, local_data, context, &
606 matrix_struct, para_env)
607 TYPE(cp_cfm_type), INTENT(IN) :: matrix
608 CHARACTER(len=*), INTENT(OUT), OPTIONAL :: name
609 INTEGER, INTENT(OUT), OPTIONAL :: nrow_global, ncol_global, nrow_block, &
610 ncol_block, nrow_local, ncol_local
611 INTEGER, DIMENSION(:), OPTIONAL, POINTER :: row_indices, col_indices
612 COMPLEX(kind=dp), CONTIGUOUS, DIMENSION(:, :), &
613 OPTIONAL, POINTER :: local_data
614 TYPE(cp_blacs_env_type), OPTIONAL, POINTER :: context
615 TYPE(cp_fm_struct_type), OPTIONAL, POINTER :: matrix_struct
616 TYPE(mp_para_env_type), OPTIONAL, POINTER :: para_env
617
618 IF (PRESENT(name)) name = matrix%name
619 IF (PRESENT(matrix_struct)) matrix_struct => matrix%matrix_struct
620 IF (PRESENT(local_data)) local_data => matrix%local_data ! not hiding things anymore :-(
621
622 CALL cp_fm_struct_get(matrix%matrix_struct, nrow_local=nrow_local, &
623 ncol_local=ncol_local, nrow_global=nrow_global, &
624 ncol_global=ncol_global, nrow_block=nrow_block, &
625 ncol_block=ncol_block, context=context, &
626 row_indices=row_indices, col_indices=col_indices, para_env=para_env)
627
628 END SUBROUTINE cp_cfm_get_info
629
630! **************************************************************************************************
631!> \brief Copy content of a full matrix into another full matrix of the same size.
632!> \param source source matrix
633!> \param destination destination matrix
634!> \author Joost VandeVondele
635! **************************************************************************************************
636 SUBROUTINE cp_cfm_to_cfm_matrix(source, destination)
637 TYPE(cp_cfm_type), INTENT(IN) :: source, destination
638
639 INTEGER :: npcol, nprow
640
641 nprow = source%matrix_struct%context%num_pe(1)
642 npcol = source%matrix_struct%context%num_pe(2)
643
644 IF (.NOT. cp2k_is_parallel .OR. &
645 cp_fm_struct_equivalent(source%matrix_struct, &
646 destination%matrix_struct)) THEN
647 IF (SIZE(source%local_data, 1) /= SIZE(destination%local_data, 1) .OR. &
648 SIZE(source%local_data, 2) /= SIZE(destination%local_data, 2)) &
649 cpabort("internal local_data has different sizes")
650 CALL zcopy(SIZE(source%local_data), source%local_data(1, 1), 1, destination%local_data(1, 1), 1)
651 ELSE
652 IF (source%matrix_struct%nrow_global /= destination%matrix_struct%nrow_global) &
653 cpabort("cannot copy between full matrixes of differen sizes")
654 IF (source%matrix_struct%ncol_global /= destination%matrix_struct%ncol_global) &
655 cpabort("cannot copy between full matrixes of differen sizes")
656#if defined(__SCALAPACK)
657 CALL pzcopy(source%matrix_struct%nrow_global* &
658 source%matrix_struct%ncol_global, &
659 source%local_data(1, 1), 1, 1, source%matrix_struct%descriptor, 1, &
660 destination%local_data(1, 1), 1, 1, destination%matrix_struct%descriptor, 1)
661#else
662 cpabort("")
663#endif
664 END IF
665 END SUBROUTINE cp_cfm_to_cfm_matrix
666
667! **************************************************************************************************
668!> \brief Copy a number of sequential columns of a full matrix into another full matrix.
669!> \param msource source matrix
670!> \param mtarget destination matrix
671!> \param ncol number of columns to copy
672!> \param source_start global index of the first column to copy within the source matrix
673!> \param target_start global index of the first column to copy within the destination matrix
674! **************************************************************************************************
675 SUBROUTINE cp_cfm_to_cfm_columns(msource, mtarget, ncol, source_start, &
676 target_start)
677
678 TYPE(cp_cfm_type), INTENT(IN) :: msource, mtarget
679 INTEGER, INTENT(IN) :: ncol
680 INTEGER, INTENT(IN), OPTIONAL :: source_start, target_start
681
682 CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_to_cfm_columns'
683
684 COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: a, b
685 INTEGER :: handle, n, ss, ts
686#if defined(__SCALAPACK)
687 INTEGER :: i
688 INTEGER, DIMENSION(9) :: desca, descb
689#endif
690
691 CALL timeset(routinen, handle)
692
693 ss = 1
694 ts = 1
695
696 IF (PRESENT(source_start)) ss = source_start
697 IF (PRESENT(target_start)) ts = target_start
698
699 n = msource%matrix_struct%nrow_global
700
701 a => msource%local_data
702 b => mtarget%local_data
703
704#if defined(__SCALAPACK)
705 desca(:) = msource%matrix_struct%descriptor(:)
706 descb(:) = mtarget%matrix_struct%descriptor(:)
707 DO i = 0, ncol - 1
708 CALL pzcopy(n, a(1, 1), 1, ss + i, desca, 1, b(1, 1), 1, ts + i, descb, 1)
709 END DO
710#else
711 CALL zcopy(ncol*n, a(1, ss), 1, b(1, ts), 1)
712#endif
713
714 CALL timestop(handle)
715
716 END SUBROUTINE cp_cfm_to_cfm_columns
717
718! **************************************************************************************************
719!> \brief Copy just a triangular matrix.
720!> \param msource source matrix
721!> \param mtarget target matrix
722!> \param uplo 'U' for upper triangular, 'L' for lower triangular
723! **************************************************************************************************
724 SUBROUTINE cp_cfm_to_cfm_triangular(msource, mtarget, uplo)
725 TYPE(cp_cfm_type), INTENT(IN) :: msource, mtarget
726 CHARACTER(len=*), INTENT(IN) :: uplo
727
728 CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_to_cfm_triangular'
729
730 COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: aa, bb
731 INTEGER :: handle, ncol, nrow
732#if defined(__SCALAPACK)
733 INTEGER, DIMENSION(9) :: desca, descb
734#endif
735
736 CALL timeset(routinen, handle)
737
738 nrow = msource%matrix_struct%nrow_global
739 ncol = msource%matrix_struct%ncol_global
740
741 aa => msource%local_data
742 bb => mtarget%local_data
743
744#if defined(__SCALAPACK)
745 desca(:) = msource%matrix_struct%descriptor(:)
746 descb(:) = mtarget%matrix_struct%descriptor(:)
747 CALL pzlacpy(uplo, nrow, ncol, aa(1, 1), 1, 1, desca, bb(1, 1), 1, 1, descb)
748#else
749 CALL zlacpy(uplo, nrow, ncol, aa(1, 1), nrow, bb(1, 1), nrow)
750#endif
751
752 CALL timestop(handle)
753 END SUBROUTINE cp_cfm_to_cfm_triangular
754
755! **************************************************************************************************
756!> \brief Copy real and imaginary parts of a complex full matrix into
757!> separate real-value full matrices.
758!> \param msource complex matrix
759!> \param mtargetr (optional) real part of the source matrix
760!> \param mtargeti (optional) imaginary part of the source matrix
761!> \note
762!> Matrix structures are assumed to be equivalent.
763! **************************************************************************************************
764 SUBROUTINE cp_cfm_to_fm(msource, mtargetr, mtargeti)
765
766 TYPE(cp_cfm_type), INTENT(IN) :: msource
767 TYPE(cp_fm_type), INTENT(IN), OPTIONAL :: mtargetr, mtargeti
768
769 CHARACTER(len=*), PARAMETER :: routinen = 'cp_cfm_to_fm'
770
771 COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: zmat
772 INTEGER :: handle
773 REAL(kind=dp), DIMENSION(:, :), POINTER :: imat, rmat
774
775 CALL timeset(routinen, handle)
776
777 zmat => msource%local_data
778 IF (PRESENT(mtargetr)) THEN
779 rmat => mtargetr%local_data
780 IF ((.NOT. cp_fm_struct_equivalent(mtargetr%matrix_struct, msource%matrix_struct)) .OR. &
781 (SIZE(rmat, 1) .NE. SIZE(zmat, 1)) .OR. &
782 (SIZE(rmat, 2) .NE. SIZE(zmat, 2))) THEN
783 cpabort("size of local_data of mtargetr differ to msource")
784 END IF
785 ! copy local data
786 rmat = real(zmat, kind=dp)
787 ELSE
788 NULLIFY (rmat)
789 END IF
790 IF (PRESENT(mtargeti)) THEN
791 imat => mtargeti%local_data
792 IF ((.NOT. cp_fm_struct_equivalent(mtargeti%matrix_struct, msource%matrix_struct)) .OR. &
793 (SIZE(imat, 1) .NE. SIZE(zmat, 1)) .OR. &
794 (SIZE(imat, 2) .NE. SIZE(zmat, 2))) THEN
795 cpabort("size of local_data of mtargeti differ to msource")
796 END IF
797 ! copy local data
798 imat = real(aimag(zmat), kind=dp)
799 ELSE
800 NULLIFY (imat)
801 END IF
802
803 CALL timestop(handle)
804
805 END SUBROUTINE cp_cfm_to_fm
806
807! **************************************************************************************************
808!> \brief Construct a complex full matrix by taking its real and imaginary parts from
809!> two separate real-value full matrices.
810!> \param msourcer (optional) real part of the complex matrix (defaults to 0.0)
811!> \param msourcei (optional) imaginary part of the complex matrix (defaults to 0.0)
812!> \param mtarget resulting complex matrix
813!> \note
814!> Matrix structures are assumed to be equivalent.
815! **************************************************************************************************
816 SUBROUTINE cp_fm_to_cfm(msourcer, msourcei, mtarget)
817 TYPE(cp_fm_type), INTENT(IN), OPTIONAL :: msourcer, msourcei
818 TYPE(cp_cfm_type), INTENT(IN) :: mtarget
819
820 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_to_cfm'
821
822 COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: zmat
823 INTEGER :: handle, mode
824 REAL(kind=dp), DIMENSION(:, :), POINTER :: imat, rmat
825
826 CALL timeset(routinen, handle)
827
828 mode = 0
829 zmat => mtarget%local_data
830 IF (PRESENT(msourcer)) THEN
831 rmat => msourcer%local_data
832 IF ((.NOT. cp_fm_struct_equivalent(msourcer%matrix_struct, mtarget%matrix_struct)) .OR. &
833 (SIZE(rmat, 1) .NE. SIZE(zmat, 1)) .OR. &
834 (SIZE(rmat, 2) .NE. SIZE(zmat, 2))) THEN
835 cpabort("size of local_data of msourcer differ to mtarget")
836 END IF
837 mode = mode + 1
838 ELSE
839 NULLIFY (rmat)
840 END IF
841 IF (PRESENT(msourcei)) THEN
842 imat => msourcei%local_data
843 IF ((.NOT. cp_fm_struct_equivalent(msourcei%matrix_struct, mtarget%matrix_struct)) .OR. &
844 (SIZE(imat, 1) .NE. SIZE(zmat, 1)) .OR. &
845 (SIZE(imat, 2) .NE. SIZE(zmat, 2))) THEN
846 cpabort("size of local_data of msourcei differ to mtarget")
847 END IF
848 mode = mode + 2
849 ELSE
850 NULLIFY (imat)
851 END IF
852 ! copy local data
853 SELECT CASE (mode)
854 CASE (0)
855 zmat(:, :) = z_zero
856 CASE (1)
857 zmat(:, :) = cmplx(rmat(:, :), 0.0_dp, kind=dp)
858 CASE (2)
859 zmat(:, :) = cmplx(0.0_dp, imat(:, :), kind=dp)
860 CASE (3)
861 zmat(:, :) = cmplx(rmat(:, :), imat(:, :), kind=dp)
862 END SELECT
863
864 CALL timestop(handle)
865
866 END SUBROUTINE cp_fm_to_cfm
867
868! **************************************************************************************************
869!> \brief Initiate the copy operation: get distribution data, post MPI isend and irecvs.
870!> \param source input complex-valued fm matrix
871!> \param destination output complex-valued fm matrix
872!> \param para_env parallel environment corresponding to the BLACS env that covers all parts
873!> of the input and output matrices
874!> \param info all of the data that will be needed to complete the copy operation
875!> \note a slightly modified version of the subroutine cp_fm_start_copy_general() that uses
876!> allocatable arrays instead of pointers wherever possible.
877! **************************************************************************************************
878 SUBROUTINE cp_cfm_start_copy_general(source, destination, para_env, info)
879 TYPE(cp_cfm_type), INTENT(IN) :: source, destination
880 TYPE(mp_para_env_type), INTENT(IN), POINTER :: para_env
881 TYPE(copy_cfm_info_type), INTENT(out) :: info
882
883 CHARACTER(len=*), PARAMETER :: routinen = 'cp_cfm_start_copy_general'
884
885 INTEGER :: dest_p_i, dest_q_j, global_rank, global_size, handle, i, j, k, mpi_rank, &
886 ncol_block_dest, ncol_block_src, ncol_local_recv, ncol_local_send, ncols, &
887 nrow_block_dest, nrow_block_src, nrow_local_recv, nrow_local_send, nrows, p, q, &
888 recv_rank, recv_size, send_rank, send_size
889 INTEGER, ALLOCATABLE, DIMENSION(:) :: all_ranks, dest2global, dest_p, dest_q, &
890 recv_count, send_count, send_disp, &
891 source2global, src_p, src_q
892 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: dest_blacs2mpi
893 INTEGER, DIMENSION(2) :: dest_block, dest_block_tmp, dest_num_pe, &
894 src_block, src_block_tmp, src_num_pe
895 INTEGER, DIMENSION(:), POINTER :: recv_col_indices, recv_row_indices, &
896 send_col_indices, send_row_indices
897 TYPE(cp_fm_struct_type), POINTER :: recv_dist, send_dist
898 TYPE(mp_request_type), DIMENSION(6) :: recv_req, send_req
899
900 CALL timeset(routinen, handle)
901
902 IF (.NOT. cp2k_is_parallel) THEN
903 ! Just copy all of the matrix data into a 'send buffer', to be unpacked later
904 nrow_local_send = SIZE(source%local_data, 1)
905 ncol_local_send = SIZE(source%local_data, 2)
906 ALLOCATE (info%send_buf(nrow_local_send*ncol_local_send))
907 k = 0
908 DO j = 1, ncol_local_send
909 DO i = 1, nrow_local_send
910 k = k + 1
911 info%send_buf(k) = source%local_data(i, j)
912 END DO
913 END DO
914 ELSE
915 NULLIFY (recv_dist, send_dist)
916 NULLIFY (recv_col_indices, recv_row_indices, send_col_indices, send_row_indices)
917
918 ! The 'global' communicator contains both the source and destination decompositions
919 global_size = para_env%num_pe
920 global_rank = para_env%mepos
921
922 ! The source/send decomposition and destination/recv decompositions may only exist on
923 ! on a subset of the processes involved in the communication
924 ! Check if the source and/or destination arguments are .not. ASSOCIATED():
925 ! if so, skip the send / recv parts (since these processes do not participate in the sending/receiving distribution)
926 IF (ASSOCIATED(destination%matrix_struct)) THEN
927 recv_dist => destination%matrix_struct
928 recv_rank = recv_dist%para_env%mepos
929 ELSE
930 recv_rank = mp_proc_null
931 END IF
932
933 IF (ASSOCIATED(source%matrix_struct)) THEN
934 send_dist => source%matrix_struct
935 send_rank = send_dist%para_env%mepos
936 ELSE
937 send_rank = mp_proc_null
938 END IF
939
940 ! Map the rank in the source/dest communicator to the global rank
941 ALLOCATE (all_ranks(0:global_size - 1))
942
943 CALL para_env%allgather(send_rank, all_ranks)
944 IF (ASSOCIATED(destination%matrix_struct)) THEN
945 ALLOCATE (source2global(0:count(all_ranks .NE. mp_proc_null) - 1))
946 DO i = 0, global_size - 1
947 IF (all_ranks(i) .NE. mp_proc_null) THEN
948 source2global(all_ranks(i)) = i
949 END IF
950 END DO
951 END IF
952
953 CALL para_env%allgather(recv_rank, all_ranks)
954 IF (ASSOCIATED(source%matrix_struct)) THEN
955 ALLOCATE (dest2global(0:count(all_ranks .NE. mp_proc_null) - 1))
956 DO i = 0, global_size - 1
957 IF (all_ranks(i) .NE. mp_proc_null) THEN
958 dest2global(all_ranks(i)) = i
959 END IF
960 END DO
961 END IF
962 DEALLOCATE (all_ranks)
963
964 ! Some data from the two decompositions will be needed by all processes in the global group :
965 ! process grid shape, block size, and the BLACS-to-MPI mapping
966
967 ! The global root process will receive the data (from the root process in each decomposition)
968 send_req(:) = mp_request_null
969 IF (global_rank == 0) THEN
970 recv_req(:) = mp_request_null
971 CALL para_env%irecv(src_block, mp_any_source, recv_req(1), tag=src_tag)
972 CALL para_env%irecv(dest_block, mp_any_source, recv_req(2), tag=dest_tag)
973 CALL para_env%irecv(src_num_pe, mp_any_source, recv_req(3), tag=src_tag)
974 CALL para_env%irecv(dest_num_pe, mp_any_source, recv_req(4), tag=dest_tag)
975 END IF
976
977 IF (ASSOCIATED(source%matrix_struct)) THEN
978 IF ((send_rank == 0)) THEN
979 ! need to use separate buffers here in case this is actually global rank 0
980 src_block_tmp = (/send_dist%nrow_block, send_dist%ncol_block/)
981 CALL para_env%isend(src_block_tmp, 0, send_req(1), tag=src_tag)
982 CALL para_env%isend(send_dist%context%num_pe, 0, send_req(2), tag=src_tag)
983 END IF
984 END IF
985
986 IF (ASSOCIATED(destination%matrix_struct)) THEN
987 IF ((recv_rank == 0)) THEN
988 dest_block_tmp = (/recv_dist%nrow_block, recv_dist%ncol_block/)
989 CALL para_env%isend(dest_block_tmp, 0, send_req(3), tag=dest_tag)
990 CALL para_env%isend(recv_dist%context%num_pe, 0, send_req(4), tag=dest_tag)
991 END IF
992 END IF
993
994 IF (global_rank == 0) THEN
995 CALL mp_waitall(recv_req(1:4))
996 ! Now we know the process decomposition, we can allocate the arrays to hold the blacs2mpi mapping
997 ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
998 dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1))
999 CALL para_env%irecv(info%src_blacs2mpi, mp_any_source, recv_req(5), tag=src_tag)
1000 CALL para_env%irecv(dest_blacs2mpi, mp_any_source, recv_req(6), tag=dest_tag)
1001 END IF
1002
1003 IF (ASSOCIATED(source%matrix_struct)) THEN
1004 IF ((send_rank == 0)) THEN
1005 CALL para_env%isend(send_dist%context%blacs2mpi(:, :), 0, send_req(5), tag=src_tag)
1006 END IF
1007 END IF
1008
1009 IF (ASSOCIATED(destination%matrix_struct)) THEN
1010 IF ((recv_rank == 0)) THEN
1011 CALL para_env%isend(recv_dist%context%blacs2mpi(:, :), 0, send_req(6), tag=dest_tag)
1012 END IF
1013 END IF
1014
1015 IF (global_rank == 0) THEN
1016 CALL mp_waitall(recv_req(5:6))
1017 END IF
1018
1019 ! Finally, broadcast the data to all processes in the global communicator
1020 CALL para_env%bcast(src_block, 0)
1021 CALL para_env%bcast(dest_block, 0)
1022 CALL para_env%bcast(src_num_pe, 0)
1023 CALL para_env%bcast(dest_num_pe, 0)
1024 info%src_num_pe(1:2) = src_num_pe(1:2)
1025 info%nblock_src(1:2) = src_block(1:2)
1026 IF (global_rank /= 0) THEN
1027 ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
1028 dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1))
1029 END IF
1030 CALL para_env%bcast(info%src_blacs2mpi, 0)
1031 CALL para_env%bcast(dest_blacs2mpi, 0)
1032
1033 recv_size = dest_num_pe(1)*dest_num_pe(2)
1034 send_size = src_num_pe(1)*src_num_pe(2)
1035 info%send_size = send_size
1036 CALL mp_waitall(send_req(:))
1037
1038 ! Setup is now complete, we can start the actual communication here.
1039 ! The order implemented here is:
1040 ! DEST_1
1041 ! compute recv sizes
1042 ! call irecv
1043 ! SRC_1
1044 ! compute send sizes
1045 ! pack send buffers
1046 ! call isend
1047 ! DEST_2
1048 ! wait for the recvs and unpack buffers (this part eventually will go into another routine to allow comms to run concurrently)
1049 ! SRC_2
1050 ! wait for the sends
1051
1052 ! DEST_1
1053 IF (ASSOCIATED(destination%matrix_struct)) THEN
1054 CALL cp_fm_struct_get(recv_dist, row_indices=recv_row_indices, &
1055 col_indices=recv_col_indices)
1056 info%recv_col_indices => recv_col_indices
1057 info%recv_row_indices => recv_row_indices
1058 nrow_block_src = src_block(1)
1059 ncol_block_src = src_block(2)
1060 ALLOCATE (recv_count(0:send_size - 1), info%recv_disp(0:send_size - 1), info%recv_request(0:send_size - 1))
1061
1062 ! Determine the recv counts, allocate the receive buffers, call mpi_irecv for all the non-zero sized receives
1063 nrow_local_recv = recv_dist%nrow_locals(recv_dist%context%mepos(1))
1064 ncol_local_recv = recv_dist%ncol_locals(recv_dist%context%mepos(2))
1065 info%nlocal_recv(1) = nrow_local_recv
1066 info%nlocal_recv(2) = ncol_local_recv
1067 ! Initialise src_p, src_q arrays (sized using number of rows/cols in the receiving distribution)
1068 ALLOCATE (src_p(nrow_local_recv), src_q(ncol_local_recv))
1069 DO i = 1, nrow_local_recv
1070 ! For each local row we will receive, we look up its global row (in recv_row_indices),
1071 ! then work out which row block it comes from, and which process row that row block comes from.
1072 src_p(i) = mod(((recv_row_indices(i) - 1)/nrow_block_src), src_num_pe(1))
1073 END DO
1074 DO j = 1, ncol_local_recv
1075 ! Similarly for the columns
1076 src_q(j) = mod(((recv_col_indices(j) - 1)/ncol_block_src), src_num_pe(2))
1077 END DO
1078 ! src_p/q now contains the process row/column ID that will send data to that row/column
1079
1080 DO q = 0, src_num_pe(2) - 1
1081 ncols = count(src_q .EQ. q)
1082 DO p = 0, src_num_pe(1) - 1
1083 nrows = count(src_p .EQ. p)
1084 ! Use the send_dist here as we are looking up the processes where the data comes from
1085 recv_count(info%src_blacs2mpi(p, q)) = nrows*ncols
1086 END DO
1087 END DO
1088 DEALLOCATE (src_p, src_q)
1089
1090 ! Use one long buffer (and displacements into that buffer)
1091 ! this prevents the need for a rectangular array where not all elements will be populated
1092 ALLOCATE (info%recv_buf(sum(recv_count(:))))
1093 info%recv_disp(0) = 0
1094 DO i = 1, send_size - 1
1095 info%recv_disp(i) = info%recv_disp(i - 1) + recv_count(i - 1)
1096 END DO
1097
1098 ! Issue receive calls on ranks which expect data
1099 DO k = 0, send_size - 1
1100 IF (recv_count(k) .GT. 0) THEN
1101 CALL para_env%irecv(info%recv_buf(info%recv_disp(k) + 1:info%recv_disp(k) + recv_count(k)), &
1102 source2global(k), info%recv_request(k))
1103 ELSE
1104 info%recv_request(k) = mp_request_null
1105 END IF
1106 END DO
1107 DEALLOCATE (source2global)
1108 END IF ! ASSOCIATED(destination)
1109
1110 ! SRC_1
1111 IF (ASSOCIATED(source%matrix_struct)) THEN
1112 CALL cp_fm_struct_get(send_dist, row_indices=send_row_indices, &
1113 col_indices=send_col_indices)
1114 nrow_block_dest = dest_block(1)
1115 ncol_block_dest = dest_block(2)
1116 ALLOCATE (send_count(0:recv_size - 1), send_disp(0:recv_size - 1), info%send_request(0:recv_size - 1))
1117
1118 ! Determine the send counts, allocate the send buffers
1119 nrow_local_send = send_dist%nrow_locals(send_dist%context%mepos(1))
1120 ncol_local_send = send_dist%ncol_locals(send_dist%context%mepos(2))
1121
1122 ! Initialise dest_p, dest_q arrays (sized nrow_local, ncol_local)
1123 ! i.e. number of rows,cols in the sending distribution
1124 ALLOCATE (dest_p(nrow_local_send), dest_q(ncol_local_send))
1125
1126 DO i = 1, nrow_local_send
1127 ! Use the send_dist%row_indices() here (we are looping over the local rows we will send)
1128 dest_p(i) = mod(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
1129 END DO
1130 DO j = 1, ncol_local_send
1131 dest_q(j) = mod(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
1132 END DO
1133 ! dest_p/q now contain the process row/column ID that will receive data from that row/column
1134
1135 DO q = 0, dest_num_pe(2) - 1
1136 ncols = count(dest_q .EQ. q)
1137 DO p = 0, dest_num_pe(1) - 1
1138 nrows = count(dest_p .EQ. p)
1139 send_count(dest_blacs2mpi(p, q)) = nrows*ncols
1140 END DO
1141 END DO
1142 DEALLOCATE (dest_p, dest_q)
1143
1144 ! Allocate the send buffer using send_count -- and calculate the offset into the buffer for each process
1145 ALLOCATE (info%send_buf(sum(send_count(:))))
1146 send_disp(0) = 0
1147 DO k = 1, recv_size - 1
1148 send_disp(k) = send_disp(k - 1) + send_count(k - 1)
1149 END DO
1150
1151 ! Loop over the smat, pack the send buffers
1152 send_count(:) = 0
1153 DO j = 1, ncol_local_send
1154 ! Use send_col_indices and row_indices here, as we are looking up the global row/column number of local rows.
1155 dest_q_j = mod(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
1156 DO i = 1, nrow_local_send
1157 dest_p_i = mod(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
1158 mpi_rank = dest_blacs2mpi(dest_p_i, dest_q_j)
1159 send_count(mpi_rank) = send_count(mpi_rank) + 1
1160 info%send_buf(send_disp(mpi_rank) + send_count(mpi_rank)) = source%local_data(i, j)
1161 END DO
1162 END DO
1163
1164 ! For each non-zero send_count, call mpi_isend
1165 DO k = 0, recv_size - 1
1166 IF (send_count(k) .GT. 0) THEN
1167 CALL para_env%isend(info%send_buf(send_disp(k) + 1:send_disp(k) + send_count(k)), &
1168 dest2global(k), info%send_request(k))
1169 ELSE
1170 info%send_request(k) = mp_request_null
1171 END IF
1172 END DO
1173 DEALLOCATE (send_count, send_disp, dest2global)
1174 END IF ! ASSOCIATED(source)
1175 DEALLOCATE (dest_blacs2mpi)
1176
1177 END IF !IF (.NOT. cp2k_is_parallel)
1178
1179 CALL timestop(handle)
1180
1181 END SUBROUTINE cp_cfm_start_copy_general
1182
1183! **************************************************************************************************
1184!> \brief Complete the copy operation: wait for comms, unpack, clean up MPI state.
1185!> \param destination output cfm matrix
1186!> \param info all of the data that will be needed to complete the copy operation
1187!> \note a slightly modified version of the subroutine cp_fm_finish_copy_general() that uses
1188!> allocatable arrays instead of pointers wherever possible.
1189! **************************************************************************************************
1190 SUBROUTINE cp_cfm_finish_copy_general(destination, info)
1191 TYPE(cp_cfm_type), INTENT(IN) :: destination
1192 TYPE(copy_cfm_info_type), INTENT(inout) :: info
1193
1194 CHARACTER(len=*), PARAMETER :: routinen = 'cp_cfm_finish_copy_general'
1195
1196 INTEGER :: handle, i, j, k, mpi_rank, ni, nj, &
1197 src_q_j
1198 INTEGER, ALLOCATABLE, DIMENSION(:) :: recv_count, src_p_i
1199 INTEGER, DIMENSION(:), POINTER :: recv_col_indices, recv_row_indices
1200
1201 CALL timeset(routinen, handle)
1202
1203 IF (.NOT. cp2k_is_parallel) THEN
1204 ! Now unpack the data from the 'send buffer'
1205 k = 0
1206 DO j = 1, SIZE(destination%local_data, 2)
1207 DO i = 1, SIZE(destination%local_data, 1)
1208 k = k + 1
1209 destination%local_data(i, j) = info%send_buf(k)
1210 END DO
1211 END DO
1212 DEALLOCATE (info%send_buf)
1213 ELSE
1214 ! Set up local variables ...
1215 recv_col_indices => info%recv_col_indices
1216 recv_row_indices => info%recv_row_indices
1217
1218 ! ... use the local variables to do the work
1219 ! DEST_2
1220 CALL mp_waitall(info%recv_request(:))
1221
1222 nj = info%nlocal_recv(2)
1223 ni = info%nlocal_recv(1)
1224 ALLOCATE (recv_count(0:info%send_size - 1), src_p_i(ni))
1225 ! Loop over the rmat, filling it in with data from the recv buffers
1226 ! (here the block sizes, num_pes refer to the distribution of the source matrix)
1227 recv_count(:) = 0
1228 DO i = 1, ni
1229 src_p_i(i) = mod(((recv_row_indices(i) - 1)/info%nblock_src(1)), info%src_num_pe(1))
1230 END DO
1231
1232 DO j = 1, nj
1233 src_q_j = mod(((recv_col_indices(j) - 1)/info%nblock_src(2)), info%src_num_pe(2))
1234 DO i = 1, ni
1235 mpi_rank = info%src_blacs2mpi(src_p_i(i), src_q_j)
1236 recv_count(mpi_rank) = recv_count(mpi_rank) + 1
1237 destination%local_data(i, j) = info%recv_buf(info%recv_disp(mpi_rank) + recv_count(mpi_rank))
1238 END DO
1239 END DO
1240
1241 DEALLOCATE (recv_count, src_p_i)
1242 ! Invalidate the stored state
1243 NULLIFY (info%recv_col_indices, info%recv_row_indices)
1244 DEALLOCATE (info%recv_disp, info%recv_request, info%recv_buf, info%src_blacs2mpi)
1245 END IF
1246
1247 CALL timestop(handle)
1248
1249 END SUBROUTINE cp_cfm_finish_copy_general
1250
1251! **************************************************************************************************
1252!> \brief Complete the copy operation: wait for comms clean up MPI state.
1253!> \param info all of the data that will be needed to complete the copy operation
1254!> \note a slightly modified version of the subroutine cp_fm_cleanup_copy_general() that uses
1255!> allocatable arrays instead of pointers wherever possible.
1256! **************************************************************************************************
1258 TYPE(copy_cfm_info_type), INTENT(inout) :: info
1259
1260 CHARACTER(len=*), PARAMETER :: routinen = 'cp_cfm_cleanup_copy_general'
1261
1262 INTEGER :: handle
1263
1264 CALL timeset(routinen, handle)
1265
1266 IF (cp2k_is_parallel) THEN
1267 ! SRC_2
1268 ! If this process is also in the destination decomposition, this deallocate
1269 ! Was already done in cp_fm_finish_copy_general
1270 IF (ALLOCATED(info%src_blacs2mpi)) DEALLOCATE (info%src_blacs2mpi)
1271 CALL mp_waitall(info%send_request(:))
1272 DEALLOCATE (info%send_request, info%send_buf)
1273 END IF
1274
1275 CALL timestop(handle)
1276 END SUBROUTINE cp_cfm_cleanup_copy_general
1277END MODULE cp_cfm_types
methods related to the blacs parallel environment
Represents a complex full matrix distributed on many processors.
subroutine, public cp_cfm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
Extract a sub-matrix from the full matrix: op(target_m)(1:n_rows,1:n_cols) = fm(start_row:start_row+n...
subroutine, public cp_cfm_get_element(matrix, irow_global, icol_global, alpha)
Get the matrix element by its global index.
subroutine, public cp_cfm_create(matrix, matrix_struct, name)
Creates a new full matrix with the given structure.
subroutine, public cp_cfm_release(matrix)
Releases a full matrix.
subroutine, public cp_cfm_set_element(matrix, irow_global, icol_global, alpha)
Set the matrix element (irow_global,icol_global) of the full matrix to alpha.
subroutine, public cp_cfm_start_copy_general(source, destination, para_env, info)
Initiate the copy operation: get distribution data, post MPI isend and irecvs.
subroutine, public cp_fm_to_cfm(msourcer, msourcei, mtarget)
Construct a complex full matrix by taking its real and imaginary parts from two separate real-value f...
subroutine, public cp_cfm_cleanup_copy_general(info)
Complete the copy operation: wait for comms clean up MPI state.
subroutine, public cp_cfm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, matrix_struct, para_env)
Returns information about a full matrix.
subroutine, public cp_cfm_set_submatrix(matrix, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
Set a sub-matrix of the full matrix: matrix(start_row:start_row+n_rows,start_col:start_col+n_cols) = ...
subroutine, public cp_cfm_set_all(matrix, alpha, beta)
Set all elements of the full matrix to alpha. Besides, set all diagonal matrix elements to beta (if g...
subroutine, public cp_cfm_to_fm(msource, mtargetr, mtargeti)
Copy real and imaginary parts of a complex full matrix into separate real-value full matrices.
subroutine, public cp_cfm_finish_copy_general(destination, info)
Complete the copy operation: wait for comms, unpack, clean up MPI state.
represent the structure of a full matrix
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
logical function, public cp_fm_struct_equivalent(fmstruct1, fmstruct2)
returns true if the two matrix structures are equivalent, false otherwise.
subroutine, public cp_fm_struct_retain(fmstruct)
retains a full matrix structure
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
Definition cp_fm_types.F:15
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public dp
Definition kinds.F:34
Definition of mathematical constants and functions.
complex(kind=dp), parameter, public z_one
complex(kind=dp), parameter, public z_zero
Interface to the message passing library MPI.
integer, parameter, public mp_proc_null
logical, parameter, public cp2k_is_parallel
integer, parameter, public mp_any_source
type(mp_request_type), parameter, public mp_request_null
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
Stores the state of a copy between cp_cfm_start_copy_general and cp_cfm_finish_copy_general.
Just to build arrays of pointers to matrices.
Represent a complex full matrix.
keeps the information about the structure of a full matrix
represent a full matrix
stores all the informations relevant to an mpi environment